From: Fan Yong Date: Sat, 13 Sep 2014 20:22:41 +0000 (+0800) Subject: LU-1453 scrub: auto trigger OI scrub more flexible X-Git-Tag: 2.6.91~39 X-Git-Url: https://git.whamcloud.com/?p=fs%2Flustre-release.git;a=commitdiff_plain;h=2c5b57ca3aaf1d27a24bce028220355a005dde3d LU-1453 scrub: auto trigger OI scrub more flexible Generally, scanning the whole device for OI scrub routine check may takes some long time. If the whole system only contains several bad OI mappings, then it is not worth to trigger OI scrub automatically with full speed when some bad OI mapping is auto-detected. Instead, we can make the OI scrub to fix the found bad OI mappings only, and if more and more bad OI mappings are found as to exceeds some given threshold that can be adjusted via some proc interface, then the OI scrub will run with full speed to scan whole device. Currently, we offer two kinds of thresholds for triggering OI scrub to scan the whole device: 1) "the total OI mappings count" vs "the bad OI mappings count". If such ratio is low than the given threshold that can be set via the proc interface "full_scrub_ratio", then trigger urgent mode OI scrub. 2) "the speed of found the bad OI mappings". If the speed exceeds the given threshold that can be adjusted via the proc interface "full_scrub_speed", then trigger urgent mode OI scrub. Test-Parameters: mdsfilesystemtype=ldiskfs mdtfilesystemtype=ldiskfs \ ostfilesystemtype=ldiskfs envdefinitions=ONLY=4 testlist=sanity-scrub Signed-off-by: Fan Yong Change-Id: Ibc4592fef1da11994ec30eb348d20576be5ae54b Reviewed-on: http://review.whamcloud.com/12738 Tested-by: Jenkins Reviewed-by: Andreas Dilger Tested-by: Maloo Reviewed-by: Lai Siyao --- diff --git a/lustre/osd-ldiskfs/osd_handler.c b/lustre/osd-ldiskfs/osd_handler.c index 0602184..3eb178b 100644 --- a/lustre/osd-ldiskfs/osd_handler.c +++ b/lustre/osd-ldiskfs/osd_handler.c @@ -633,7 +633,11 @@ trigger: if (thread_is_running(&scrub->os_thread)) { result = -EINPROGRESS; } else if (!dev->od_noscrub) { - result = osd_scrub_start(dev); + /* Since we do not know the right OI mapping, + * we have to trigger OI scrub to scan the + * whole device. */ + result = osd_scrub_start(dev, SS_AUTO_FULL | + SS_CLEAR_DRYRUN | SS_CLEAR_FAILOUT); LCONSOLE_WARN("%.16s: trigger OI scrub by RPC " "for "DFID", rc = %d [1]\n", osd_name(dev), PFID(fid),result); @@ -4159,12 +4163,13 @@ again: } if (!dev->od_noscrub && ++once == 1) { - rc = osd_scrub_start(dev); + rc = osd_scrub_start(dev, SS_AUTO_PARTIAL | SS_CLEAR_DRYRUN | + SS_CLEAR_FAILOUT); LCONSOLE_WARN("%.16s: trigger OI scrub by RPC for "DFID ", rc = %d [2]\n", LDISKFS_SB(osd_sb(dev))->s_es->s_volume_name, PFID(fid), rc); - if (rc == 0) + if (rc == 0 || rc == -EALREADY) goto again; } @@ -6087,6 +6092,8 @@ static int osd_device_init0(const struct lu_env *env, if (server_name_is_ost(o->od_svname)) o->od_is_ost = 1; + o->od_full_scrub_ratio = OFSR_DEFAULT; + o->od_full_scrub_speed = FULL_SCRUB_SPEED_DEFULT; rc = osd_mount(env, o, cfg); if (rc != 0) GOTO(out_capa, rc); diff --git a/lustre/osd-ldiskfs/osd_internal.h b/lustre/osd-ldiskfs/osd_internal.h index 12e759f..6d92a31 100644 --- a/lustre/osd-ldiskfs/osd_internal.h +++ b/lustre/osd-ldiskfs/osd_internal.h @@ -277,8 +277,31 @@ struct osd_device { /* osd seq instance */ struct lu_client_seq *od_cl_seq; + /* If the ratio of "the total OI mappings count" vs + * "the bad OI mappings count" is lower than the + * osd_device::od_full_scrub_ratio, then trigger + * OI scrub to scan the whole the device. */ + __u64 od_full_scrub_ratio; + /* If the speed of found bad OI mappings (per minute) + * exceeds the osd_device::od_full_scrub_speed, then + * trigger OI scrub to scan the whole the device. */ + __u64 od_full_scrub_speed; }; +enum osd_full_scrub_ratio { + /* Trigger OI scrub to scan the whole device directly. */ + OFSR_DIRECTLY = 0, + + /* Because the bad OI mappings count cannot be larger than + * the total OI mappints count, then setting OFSR_NEVER means + * that the whole device scanning cannot be triggered by auto + * detected bad OI mappings during the RPC services. */ + OFSR_NEVER = 1, + OFSR_DEFAULT = 10000, +}; + +#define FULL_SCRUB_SPEED_DEFULT 60 + /* There are at most 10 uid/gids are affected in a transaction, and * that's rename case: * - 2 for source parent uid & gid; @@ -670,7 +693,7 @@ int osd_obj_spec_update(struct osd_thread_info *info, struct osd_device *osd, void osd_scrub_file_reset(struct osd_scrub *scrub, __u8 *uuid, __u64 flags); int osd_scrub_file_store(struct osd_scrub *scrub); char *osd_lf_fid2name(const struct lu_fid *fid); -int osd_scrub_start(struct osd_device *dev); +int osd_scrub_start(struct osd_device *dev, __u32 flags); int osd_scrub_setup(const struct lu_env *env, struct osd_device *dev); void osd_scrub_cleanup(const struct lu_env *env, struct osd_device *dev); int osd_oii_insert(struct osd_device *dev, struct osd_idmap_cache *oic, diff --git a/lustre/osd-ldiskfs/osd_lproc.c b/lustre/osd-ldiskfs/osd_lproc.c index 1c534b1..c46ef96 100644 --- a/lustre/osd-ldiskfs/osd_lproc.c +++ b/lustre/osd-ldiskfs/osd_lproc.c @@ -403,6 +403,79 @@ ldiskfs_osd_auto_scrub_seq_write(struct file *file, const char *buffer, } LPROC_SEQ_FOPS(ldiskfs_osd_auto_scrub); +static int ldiskfs_osd_full_scrub_ratio_seq_show(struct seq_file *m, void *data) +{ + struct osd_device *dev = osd_dt_dev((struct dt_device *)m->private); + + LASSERT(dev != NULL); + if (unlikely(dev->od_mnt == NULL)) + return -EINPROGRESS; + + return seq_printf(m, LPU64"\n", dev->od_full_scrub_ratio); +} + +static ssize_t +ldiskfs_osd_full_scrub_ratio_seq_write(struct file *file, const char *buffer, + size_t count, loff_t *off) +{ + struct seq_file *m = file->private_data; + struct dt_device *dt = m->private; + struct osd_device *dev = osd_dt_dev(dt); + int val, rc; + + LASSERT(dev != NULL); + if (unlikely(dev->od_mnt == NULL)) + return -EINPROGRESS; + + rc = lprocfs_write_helper(buffer, count, &val); + if (rc != 0) + return rc; + + if (val < 0) + return -EINVAL; + + dev->od_full_scrub_ratio = val; + return count; +} +LPROC_SEQ_FOPS(ldiskfs_osd_full_scrub_ratio); + +static int ldiskfs_osd_full_scrub_speed_seq_show(struct seq_file *m, void *data) +{ + struct osd_device *dev = osd_dt_dev((struct dt_device *)m->private); + + LASSERT(dev != NULL); + if (unlikely(dev->od_mnt == NULL)) + return -EINPROGRESS; + + return seq_printf(m, LPU64" (bad OI mappings/minute)\n", + dev->od_full_scrub_speed); +} + +static ssize_t +ldiskfs_osd_full_scrub_speed_seq_write(struct file *file, const char *buffer, + size_t count, loff_t *off) +{ + struct seq_file *m = file->private_data; + struct dt_device *dt = m->private; + struct osd_device *dev = osd_dt_dev(dt); + int val, rc; + + LASSERT(dev != NULL); + if (unlikely(dev->od_mnt == NULL)) + return -EINPROGRESS; + + rc = lprocfs_write_helper(buffer, count, &val); + if (rc != 0) + return rc; + + if (val < 0) + return -EINVAL; + + dev->od_full_scrub_speed = val; + return count; +} +LPROC_SEQ_FOPS(ldiskfs_osd_full_scrub_speed); + static int ldiskfs_osd_track_declares_assert_seq_show(struct seq_file *m, void *data) { @@ -538,6 +611,10 @@ struct lprocfs_seq_vars lprocfs_osd_obd_vars[] = { .fops = &ldiskfs_osd_pdo_fops }, { .name = "auto_scrub", .fops = &ldiskfs_osd_auto_scrub_fops }, + { .name = "full_scrub_ratio", + .fops = &ldiskfs_osd_full_scrub_ratio_fops }, + { .name = "full_scrub_speed", + .fops = &ldiskfs_osd_full_scrub_speed_fops }, { .name = "oi_scrub", .fops = &ldiskfs_osd_oi_scrub_fops }, { .name = "read_cache_enable", diff --git a/lustre/osd-ldiskfs/osd_scrub.c b/lustre/osd-ldiskfs/osd_scrub.c index 709d6505..bc7bf68 100644 --- a/lustre/osd-ldiskfs/osd_scrub.c +++ b/lustre/osd-ldiskfs/osd_scrub.c @@ -285,7 +285,9 @@ void osd_scrub_file_reset(struct osd_scrub *scrub, __u8 *uuid, __u64 flags) sf->sf_items_checked = 0; sf->sf_items_updated = 0; sf->sf_items_failed = 0; - sf->sf_items_updated_prior = 0; + if (!scrub->os_in_join) + sf->sf_items_updated_prior = 0; + sf->sf_items_noscrub = 0; sf->sf_items_igif = 0; } @@ -516,7 +518,9 @@ iget: } } - scrub->os_full_speed = 1; + if (!scrub->os_partial_scan) + scrub->os_full_speed = 1; + ops = DTO_INDEX_INSERT; idx = osd_oi_fid2idx(dev, fid); switch (val) { @@ -547,7 +551,9 @@ iget: GOTO(out, rc = 0); } else { - scrub->os_full_speed = 1; + if (!scrub->os_partial_scan) + scrub->os_full_speed = 1; + sf->sf_flags |= SF_INCONSISTENT; /* XXX: If the device is restored from file-level backup, then @@ -630,14 +636,12 @@ static int osd_scrub_prep(struct osd_device *dev) down_write(&scrub->os_rwsem); if (flags & SS_SET_FAILOUT) sf->sf_param |= SP_FAILOUT; - - if (flags & SS_CLEAR_FAILOUT) + else if (flags & SS_CLEAR_FAILOUT) sf->sf_param &= ~SP_FAILOUT; - if (flags & SS_SET_DRYRUN) + if (flags & SS_SET_DRYRUN) { sf->sf_param |= SP_DRYRUN; - - if (flags & SS_CLEAR_DRYRUN && sf->sf_param & SP_DRYRUN) { + } else if (flags & SS_CLEAR_DRYRUN && sf->sf_param & SP_DRYRUN) { sf->sf_param &= ~SP_DRYRUN; drop_dryrun = true; } @@ -646,22 +650,29 @@ static int osd_scrub_prep(struct osd_device *dev) osd_scrub_file_reset(scrub, LDISKFS_SB(osd_sb(dev))->s_es->s_uuid, 0); - if (flags & SS_AUTO) { + if (flags & SS_AUTO_FULL) { scrub->os_full_speed = 1; + scrub->os_partial_scan = 0; sf->sf_flags |= SF_AUTO; - /* For the case of OI scrub auto triggered, NOT dryrun. */ - sf->sf_param &= ~SP_FAILOUT; + } else if (flags & SS_AUTO_PARTIAL) { + scrub->os_full_speed = 0; + scrub->os_partial_scan = 1; + sf->sf_flags |= SF_AUTO; + } else if (sf->sf_flags & (SF_RECREATED | SF_INCONSISTENT | + SF_UPGRADE)) { + scrub->os_full_speed = 1; + scrub->os_partial_scan = 0; } else { scrub->os_full_speed = 0; + scrub->os_partial_scan = 0; } - if (sf->sf_flags & (SF_RECREATED | SF_INCONSISTENT | SF_UPGRADE)) - scrub->os_full_speed = 1; - - scrub->os_in_prior = 0; spin_lock(&scrub->os_lock); + scrub->os_in_prior = 0; scrub->os_waiting = 0; scrub->os_paused = 0; + scrub->os_in_join = 0; + scrub->os_full_scrub = 0; spin_unlock(&scrub->os_lock); scrub->os_new_checked = 0; if (drop_dryrun && sf->sf_pos_first_inconsistent != 0) @@ -769,13 +780,13 @@ typedef int (*osd_iit_next_policy)(struct osd_thread_info *info, struct osd_device *dev, struct osd_iit_param *param, struct osd_idmap_cache **oic, - int noslot); + const bool noslot); typedef int (*osd_iit_exec_policy)(struct osd_thread_info *info, struct osd_device *dev, struct osd_iit_param *param, struct osd_idmap_cache *oic, - int *noslot, int rc); + bool *noslot, int rc); static int osd_iit_next(struct osd_iit_param *param, __u32 *pos) { @@ -942,7 +953,7 @@ put: static int osd_scrub_next(struct osd_thread_info *info, struct osd_device *dev, struct osd_iit_param *param, - struct osd_idmap_cache **oic, int noslot) + struct osd_idmap_cache **oic, const bool noslot) { struct osd_scrub *scrub = &dev->od_scrub; struct ptlrpc_thread *thread = &scrub->os_thread; @@ -983,7 +994,7 @@ static int osd_scrub_next(struct osd_thread_info *info, struct osd_device *dev, return 0; } - if (noslot != 0) + if (noslot) return SCRUB_NEXT_WAIT; rc = osd_iit_next(param, &scrub->os_pos_current); @@ -1000,7 +1011,7 @@ static int osd_scrub_next(struct osd_thread_info *info, struct osd_device *dev, static int osd_preload_next(struct osd_thread_info *info, struct osd_device *dev, struct osd_iit_param *param, - struct osd_idmap_cache **oic, int noslot) + struct osd_idmap_cache **oic, const bool noslot) { struct osd_otable_cache *ooc = &dev->od_otable_it->ooi_cache; struct osd_scrub *scrub; @@ -1044,7 +1055,7 @@ osd_scrub_wakeup(struct osd_scrub *scrub, struct osd_otable_it *it) static int osd_scrub_exec(struct osd_thread_info *info, struct osd_device *dev, struct osd_iit_param *param, - struct osd_idmap_cache *oic, int *noslot, int rc) + struct osd_idmap_cache *oic, bool *noslot, int rc) { struct l_wait_info lwi = { 0 }; struct osd_scrub *scrub = &dev->od_scrub; @@ -1099,7 +1110,7 @@ wait: return 0; if (ooc != NULL && osd_scrub_has_window(scrub, ooc)) { - *noslot = 0; + *noslot = false; return 0; } @@ -1108,15 +1119,15 @@ wait: &lwi); if (ooc != NULL && osd_scrub_has_window(scrub, ooc)) - *noslot = 0; + *noslot = false; else - *noslot = 1; + *noslot = true; return 0; } static int osd_preload_exec(struct osd_thread_info *info, struct osd_device *dev, struct osd_iit_param *param, - struct osd_idmap_cache *oic, int *noslot, int rc) + struct osd_idmap_cache *oic, bool *noslot, int rc) { struct osd_otable_cache *ooc = &dev->od_otable_it->ooi_cache; @@ -1131,22 +1142,170 @@ static int osd_preload_exec(struct osd_thread_info *info, #define SCRUB_IT_ALL 1 #define SCRUB_IT_CRASH 2 +static void osd_scrub_join(struct osd_device *dev, __u32 flags) +{ + struct osd_scrub *scrub = &dev->od_scrub; + struct ptlrpc_thread *thread = &scrub->os_thread; + struct scrub_file *sf = &scrub->os_file; + int rc; + ENTRY; + + LASSERT(!(flags & SS_AUTO_PARTIAL)); + + down_write(&scrub->os_rwsem); + scrub->os_in_join = 1; + if (flags & SS_SET_FAILOUT) + sf->sf_param |= SP_FAILOUT; + else if (flags & SS_CLEAR_FAILOUT) + sf->sf_param &= ~SP_FAILOUT; + + if (flags & SS_SET_DRYRUN) + sf->sf_param |= SP_DRYRUN; + else if (flags & SS_CLEAR_DRYRUN) + sf->sf_param &= ~SP_DRYRUN; + + if (flags & SS_RESET) + osd_scrub_file_reset(scrub, + LDISKFS_SB(osd_sb(dev))->s_es->s_uuid, 0); + + if (flags & SS_AUTO_FULL || + sf->sf_flags & (SF_RECREATED | SF_INCONSISTENT | SF_UPGRADE)) + scrub->os_full_speed = 1; + else + scrub->os_full_speed = 0; + + scrub->os_new_checked = 0; + if (sf->sf_pos_last_checkpoint != 0) + sf->sf_pos_latest_start = sf->sf_pos_last_checkpoint + 1; + else + sf->sf_pos_latest_start = LDISKFS_FIRST_INO(osd_sb(dev)) + 1; + + scrub->os_pos_current = sf->sf_pos_latest_start; + sf->sf_time_latest_start = cfs_time_current_sec(); + sf->sf_time_last_checkpoint = sf->sf_time_latest_start; + rc = osd_scrub_file_store(scrub); + if (rc != 0) + CDEBUG(D_LFSCK, "%.16s: fail to store scrub file when join " + "the OI scrub: rc = %d\n", osd_scrub2name(scrub), rc); + + spin_lock(&scrub->os_lock); + scrub->os_waiting = 0; + scrub->os_paused = 0; + scrub->os_partial_scan = 0; + scrub->os_in_join = 0; + scrub->os_full_scrub = 0; + spin_unlock(&scrub->os_lock); + wake_up_all(&thread->t_ctl_waitq); + up_write(&scrub->os_rwsem); + + EXIT; +} + static int osd_inode_iteration(struct osd_thread_info *info, struct osd_device *dev, __u32 max, bool preload) { + struct osd_scrub *scrub = &dev->od_scrub; + struct ptlrpc_thread *thread = &scrub->os_thread; + struct scrub_file *sf = &scrub->os_file; osd_iit_next_policy next; osd_iit_exec_policy exec; __u32 *pos; __u32 *count; - struct osd_iit_param param; + struct osd_iit_param param = { 0 }; + struct l_wait_info lwi = { 0 }; __u32 limit; - int noslot = 0; int rc; + bool noslot = true; ENTRY; + param.sb = osd_sb(dev); + if (preload) + goto full; + + while (scrub->os_partial_scan && !scrub->os_in_join) { + struct osd_idmap_cache *oic = NULL; + + rc = osd_scrub_next(info, dev, ¶m, &oic, noslot); + switch (rc) { + case SCRUB_NEXT_EXIT: + RETURN(0); + case SCRUB_NEXT_CRASH: + RETURN(SCRUB_IT_CRASH); + case SCRUB_NEXT_FATAL: + RETURN(-EINVAL); + case SCRUB_NEXT_WAIT: { + struct kstatfs *ksfs = &info->oti_ksfs; + __u64 used; + + if (dev->od_full_scrub_ratio == OFSR_NEVER || + unlikely(sf->sf_items_updated_prior == 0)) + goto wait; + + if (dev->od_full_scrub_ratio == OFSR_DIRECTLY || + scrub->os_full_scrub) { + osd_scrub_join(dev, SS_AUTO_FULL | SS_RESET); + goto full; + } + + rc = param.sb->s_op->statfs(param.sb->s_root, ksfs); + if (rc != 0) + goto wait; + + used = ksfs->f_files - ksfs->f_ffree; + do_div(used, sf->sf_items_updated_prior); + /* If we hit too much inconsistent OI mappings during + * the partial scan, then scan the device completely. */ + if (used < dev->od_full_scrub_ratio) { + osd_scrub_join(dev, SS_AUTO_FULL | SS_RESET); + goto full; + } + +wait: + if (OBD_FAIL_CHECK(OBD_FAIL_OSD_SCRUB_DELAY) && + cfs_fail_val > 0) + continue; + + sf->sf_status = SS_COMPLETED; + sf->sf_flags &= ~(SF_RECREATED | SF_INCONSISTENT | + SF_UPGRADE | SF_AUTO); + l_wait_event(thread->t_ctl_waitq, + !thread_is_running(thread) || + !scrub->os_partial_scan || + scrub->os_in_join || + !list_empty(&scrub->os_inconsistent_items), + &lwi); + + if (unlikely(!thread_is_running(thread))) + RETURN(0); + + if (!scrub->os_partial_scan || scrub->os_in_join) + goto full; + + continue; + } + default: + LASSERTF(rc == 0, "rc = %d\n", rc); + + sf->sf_status = SS_SCANNING; + sf->sf_flags |= SF_AUTO; + osd_scrub_exec(info, dev, ¶m, oic, &noslot, rc); + break; + } + } + +full: if (!preload) { - struct osd_scrub *scrub = &dev->od_scrub; + sf->sf_status = SS_SCANNING; + l_wait_event(thread->t_ctl_waitq, + !thread_is_running(thread) || !scrub->os_in_join, + &lwi); + + if (unlikely(!thread_is_running(thread))) + RETURN(0); + } + noslot = false; + if (!preload) { next = osd_scrub_next; exec = osd_scrub_exec; pos = &scrub->os_pos_current; @@ -1159,7 +1318,6 @@ static int osd_inode_iteration(struct osd_thread_info *info, pos = &ooc->ooc_pos_preload; count = &ooc->ooc_cached_items; } - param.sb = osd_sb(dev); limit = le32_to_cpu(LDISKFS_SB(param.sb)->s_es->s_inodes_count); while (*pos <= limit && *count < max) { @@ -1186,8 +1344,7 @@ static int osd_inode_iteration(struct osd_thread_info *info, if (param.bitmap == NULL) { CDEBUG(D_LFSCK, "%.16s: fail to read bitmap for %u, " "scrub will stop, urgent mode\n", - LDISKFS_SB(param.sb)->s_es->s_volume_name, - (__u32)param.bg); + osd_scrub2name(scrub), (__u32)param.bg); RETURN(-EIO); } @@ -1276,7 +1433,7 @@ static int osd_scrub_main(void *args) GOTO(out, rc); } - if (!scrub->os_full_speed) { + if (!scrub->os_full_speed && !scrub->os_partial_scan) { struct l_wait_info lwi = { 0 }; struct osd_otable_it *it = dev->od_otable_it; struct osd_otable_cache *ooc = &it->ooi_cache; @@ -2035,17 +2192,30 @@ static int do_osd_scrub_start(struct osd_device *dev, __u32 flags) int rc; ENTRY; -again: /* os_lock: sync status between stop and scrub thread */ spin_lock(&scrub->os_lock); + +again: if (thread_is_running(thread)) { spin_unlock(&scrub->os_lock); - RETURN(-EALREADY); - } else if (unlikely(thread_is_stopping(thread))) { + if (!scrub->os_partial_scan || flags & SS_AUTO_PARTIAL) + RETURN(-EALREADY); + + osd_scrub_join(dev, flags); + spin_lock(&scrub->os_lock); + if (!thread_is_running(thread)) + goto again; + + spin_unlock(&scrub->os_lock); + RETURN(0); + } + + if (unlikely(thread_is_stopping(thread))) { spin_unlock(&scrub->os_lock); l_wait_event(thread->t_ctl_waitq, thread_is_stopped(thread), &lwi); + spin_lock(&scrub->os_lock); goto again; } spin_unlock(&scrub->os_lock); @@ -2077,14 +2247,14 @@ again: RETURN(0); } -int osd_scrub_start(struct osd_device *dev) +int osd_scrub_start(struct osd_device *dev, __u32 flags) { int rc; ENTRY; /* od_otable_mutex: prevent curcurrent start/stop */ mutex_lock(&dev->od_otable_mutex); - rc = do_osd_scrub_start(dev, SS_AUTO); + rc = do_osd_scrub_start(dev, flags); mutex_unlock(&dev->od_otable_mutex); RETURN(rc == -EALREADY ? 0 : rc); @@ -2252,7 +2422,7 @@ int osd_scrub_setup(const struct lu_env *env, struct osd_device *dev) (sf->sf_status == SS_INIT && sf->sf_flags & (SF_RECREATED | SF_INCONSISTENT | SF_UPGRADE)))) - rc = osd_scrub_start(dev); + rc = osd_scrub_start(dev, SS_AUTO_FULL); if (rc != 0) GOTO(cleanup_oi, rc); @@ -2336,7 +2506,7 @@ static struct dt_it *osd_otable_it_init(const struct lu_env *env, start |= SS_CLEAR_DRYRUN; } - rc = do_osd_scrub_start(dev, start); + rc = do_osd_scrub_start(dev, start & ~SS_AUTO_PARTIAL); if (rc < 0 && rc != -EALREADY) { dev->od_otable_it = NULL; OBD_FREE_PTR(it); @@ -2546,6 +2716,8 @@ const struct dt_index_operations osd_otable_ops = { /* high priority inconsistent items list APIs */ +#define SCRUB_BAD_OIMAP_DECAY_INTERVAL 60 + int osd_oii_insert(struct osd_device *dev, struct osd_idmap_cache *oic, int insert) { @@ -2563,6 +2735,25 @@ int osd_oii_insert(struct osd_device *dev, struct osd_idmap_cache *oic, oii->oii_cache = *oic; oii->oii_insert = insert; + if (scrub->os_partial_scan) { + __u64 now = cfs_time_current_sec(); + + /* If there haven't been errors in a long time, + * decay old count until either the errors are + * gone or we reach the current interval. */ + while (unlikely(scrub->os_bad_oimap_count > 0 && + scrub->os_bad_oimap_time + + SCRUB_BAD_OIMAP_DECAY_INTERVAL < now)) { + scrub->os_bad_oimap_count >>= 1; + scrub->os_bad_oimap_time += + SCRUB_BAD_OIMAP_DECAY_INTERVAL; + } + + scrub->os_bad_oimap_time = now; + if (++scrub->os_bad_oimap_count > dev->od_full_scrub_speed) + scrub->os_full_scrub = 1; + } + spin_lock(&scrub->os_lock); if (unlikely(!thread_is_running(thread))) { spin_unlock(&scrub->os_lock); diff --git a/lustre/osd-ldiskfs/osd_scrub.h b/lustre/osd-ldiskfs/osd_scrub.h index f046e33..d4d67d4 100644 --- a/lustre/osd-ldiskfs/osd_scrub.h +++ b/lustre/osd-ldiskfs/osd_scrub.h @@ -96,14 +96,17 @@ enum scrub_start { /* Reset scrub start position. */ SS_RESET = 0x00000004, - /* Trigger scrub automatically. */ - SS_AUTO = 0x00000008, + /* Trigger full scrub automatically. */ + SS_AUTO_FULL = 0x00000008, + + /* Trigger partial scrub automatically. */ + SS_AUTO_PARTIAL = 0x00000010, /* Set dryrun flag. */ - SS_SET_DRYRUN = 0x00000010, + SS_SET_DRYRUN = 0x00000020, /* Clear dryrun flag. */ - SS_CLEAR_DRYRUN = 0x00000020, + SS_CLEAR_DRYRUN = 0x00000040, }; /* The flags here are only used inside OSD, NOT be visible by dump(). */ @@ -228,7 +231,12 @@ struct osd_scrub { os_waiting:1, /* Waiting for scan window. */ os_full_speed:1, /* run w/o speed limit */ os_paused:1, /* The scrub is paused. */ - os_convert_igif:1; + os_convert_igif:1, + os_partial_scan:1, + os_in_join:1, + os_full_scrub:1; + __u64 os_bad_oimap_count; + __u64 os_bad_oimap_time; }; #endif /* _OSD_SCRUB_H */ diff --git a/lustre/tests/sanity-scrub.sh b/lustre/tests/sanity-scrub.sh index c612cdb..860e082 100644 --- a/lustre/tests/sanity-scrub.sh +++ b/lustre/tests/sanity-scrub.sh @@ -49,6 +49,9 @@ setupall [[ $(lustre_version_code $SINGLEMDS) -lt $(version_code 2.3.90) ]] && ALWAYS_EXCEPT="$ALWAYS_EXCEPT 1a" +[[ $(lustre_version_code $SINGLEMDS) -le $(version_code 2.6.50) ]] && + ALWAYS_EXCEPT="$ALWAYS_EXCEPT 4" + [[ $(lustre_version_code $SINGLEMDS) -le $(version_code 2.4.1) ]] && ALWAYS_EXCEPT="$ALWAYS_EXCEPT 15" @@ -242,6 +245,18 @@ scrub_check_data() { done } +scrub_check_data2() { + local filename=$1 + local error_id=$2 + local n + + for n in $(seq $MDSCOUNT); do + diff -q $LUSTRE/tests/$filename \ + $DIR/$tdir/mds$n/$filename || + error "($error_id) File data check failed" + done +} + scrub_remove_ois() { local error_id=$1 local index=$2 @@ -273,6 +288,29 @@ scrub_enable_auto() { done } +full_scrub_ratio() { + [[ $(lustre_version_code $SINGLEMDS) -le $(version_code 2.6.50) ]] && + return + + local ratio=$1 + local n + + for n in $(seq $MDSCOUNT); do + do_facet mds$n $LCTL set_param -n \ + osd-ldiskfs.$(facet_svc mds$n).full_scrub_ratio $ratio + done +} + +full_scrub_speed() { + local speed=$1 + local n + + for n in $(seq $MDSCOUNT); do + do_facet mds$n $LCTL set_param -n \ + osd-ldiskfs.$(facet_svc mds$n).full_scrub_speed $speed + done +} + test_0() { scrub_prep 0 echo "starting MDTs without disabling OI scrub" @@ -370,7 +408,7 @@ test_3() { } #run_test 3 "Do not trigger OI scrub when MDT mounts if 'noscrub' specified" -test_4() { +test_4a() { scrub_prep 0 scrub_backup_restore 1 echo "starting MDTs with OI scrub disabled" @@ -378,11 +416,163 @@ test_4() { scrub_check_flags 4 inconsistent mount_client $MOUNT || error "(5) Fail to start client!" scrub_enable_auto + full_scrub_ratio 0 scrub_check_data 6 + sleep 3 + scrub_check_status 7 completed scrub_check_flags 8 "" + + local -a updated0 + for n in $(seq $MDSCOUNT); do + updated0[$n]=$(scrub_status $n | + awk '/^sf_items_updated_prior/ { print $2 }') + done + + scrub_check_data2 sanity-scrub.sh 9 + sleep 3 + + local -a updated1 + for n in $(seq $MDSCOUNT); do + updated1[$n]=$(scrub_status $n | + awk '/^sf_items_updated_prior/ { print $2 }') + [ ${updated0[$n]} -eq ${updated1[$n]} ] || + error "(10) NOT auto trigger full scrub as expected" + done } -run_test 4 "Trigger OI scrub automatically if inconsistent OI mapping was found" +run_test 4a "Auto trigger OI scrub if bad OI mapping was found (1)" + +test_4b() { + scrub_prep 5 + scrub_backup_restore 1 + echo "starting MDTs with OI scrub disabled" + scrub_start_mds 2 "$MOUNT_OPTS_NOSCRUB" + scrub_check_flags 4 inconsistent + mount_client $MOUNT || error "(5) Fail to start client!" + scrub_enable_auto + full_scrub_ratio 10 + full_scrub_speed 10000 + scrub_check_data 6 + sleep 3 + + scrub_check_status 7 completed + scrub_check_flags 8 "" + + local -a updated0 + for n in $(seq $MDSCOUNT); do + updated0[$n]=$(scrub_status $n | + awk '/^sf_items_updated_prior/ { print $2 }') + done + + scrub_check_data2 sanity-scrub.sh 9 + sleep 3 + + scrub_check_status 10 completed + scrub_check_flags 11 "" + + local -a updated1 + for n in $(seq $MDSCOUNT); do + updated1[$n]=$(scrub_status $n | + awk '/^sf_items_updated_prior/ { print $2 }') + [ ${updated0[$n]} -lt ${updated1[$n]} ] || + error "(12) Auto trigger full scrub unexpectedly" + done + + for n in $(seq $MDSCOUNT); do + ls -l $DIR/$tdir/mds$n/*.sh > /dev/null || + error "(13) fail to ls" + done + sleep 3 + + scrub_check_status 14 completed + scrub_check_flags 15 "" + + for n in $(seq $MDSCOUNT); do + updated0[$n]=$(scrub_status $n | + awk '/^sf_items_updated_prior/ { print $2 }') + [ ${updated0[$n]} -gt ${updated1[$n]} ] || + error "(16) Auto trigger full scrub unexpectedly" + done + + for n in $(seq $MDSCOUNT); do + ls -l $DIR/$tdir/mds$n/${tfile}1 || error "(17) fail to ls" + done + sleep 3 + + for n in $(seq $MDSCOUNT); do + updated1[$n]=$(scrub_status $n | + awk '/^sf_items_updated_prior/ { print $2 }') + [ ${updated0[$n]} -eq ${updated1[$n]} ] || + error "(18) NOT auto trigger full scrub as expected" + done +} +run_test 4b "Auto trigger OI scrub if bad OI mapping was found (2)" + +test_4c() { + scrub_prep 500 + scrub_backup_restore 1 + echo "starting MDTs with OI scrub disabled" + scrub_start_mds 2 "$MOUNT_OPTS_NOSCRUB" + scrub_check_flags 4 inconsistent + mount_client $MOUNT || error "(5) Fail to start client!" + scrub_enable_auto + full_scrub_ratio 2 + full_scrub_speed 20 + scrub_check_data 6 + sleep 3 + + scrub_check_status 7 completed + scrub_check_flags 8 "" + + local -a updated0 + for n in $(seq $MDSCOUNT); do + updated0[$n]=$(scrub_status $n | + awk '/^sf_items_updated_prior/ { print $2 }') + done + + scrub_check_data2 sanity-scrub.sh 9 + sleep 3 + + scrub_check_status 10 completed + scrub_check_flags 11 "" + + local -a updated1 + for n in $(seq $MDSCOUNT); do + updated1[$n]=$(scrub_status $n | + awk '/^sf_items_updated_prior/ { print $2 }') + [ ${updated0[$n]} -lt ${updated1[$n]} ] || + error "(12) Auto trigger full scrub unexpectedly" + done + + for n in $(seq $MDSCOUNT); do + ls -l $DIR/$tdir/mds$n/*.sh > /dev/null || + error "(13) fail to ls" + done + sleep 3 + + scrub_check_status 14 completed + scrub_check_flags 15 "" + + for n in $(seq $MDSCOUNT); do + updated0[$n]=$(scrub_status $n | + awk '/^sf_items_updated_prior/ { print $2 }') + [ ${updated0[$n]} -gt ${updated1[$n]} ] || + error "(16) Auto trigger full scrub unexpectedly" + done + + for n in $(seq $MDSCOUNT); do + ls -l $DIR/$tdir/mds$n/${tfile}1 || error "(17) fail to ls" + done + sleep 3 + + for n in $(seq $MDSCOUNT); do + updated1[$n]=$(scrub_status $n | + awk '/^sf_items_updated_prior/ { print $2 }') + [ ${updated0[$n]} -eq ${updated1[$n]} ] || + error "(18) NOT auto trigger full scrub as expected" + done +} +run_test 4c "Auto trigger OI scrub if bad OI mapping was found (3)" test_5() { formatall > /dev/null @@ -401,6 +591,7 @@ test_5() { do_nodes $(comma_list $(mdts_nodes)) \ $LCTL set_param fail_val=3 fail_loc=0x190 + full_scrub_ratio 0 scrub_check_data 6 umount_client $MOUNT || error "(7) Fail to stop client!" scrub_check_status 8 scanning @@ -464,8 +655,9 @@ test_6() { #define OBD_FAIL_OSD_SCRUB_DELAY 0x190 do_nodes $(comma_list $(mdts_nodes)) \ - $LCTL set_param fail_val=3 fail_loc=0x190 + $LCTL set_param fail_val=2 fail_loc=0x190 + full_scrub_ratio 0 scrub_check_data 6 # Sleep 5 sec to guarantee at least one object processed by OI scrub @@ -543,6 +735,7 @@ test_7() { do_nodes $(comma_list $(mdts_nodes)) \ $LCTL set_param fail_val=3 fail_loc=0x190 + full_scrub_ratio 0 scrub_check_data 6 local n @@ -681,6 +874,7 @@ test_10a() { do_nodes $(comma_list $(mdts_nodes)) \ $LCTL set_param fail_val=1 fail_loc=0x190 + full_scrub_ratio 0 scrub_check_data 6 scrub_check_status 7 scanning umount_client $MOUNT || error "(8) Fail to stop client!"