From 3ff1c932f869c8535aaebc56f0c924a760d1d601 Mon Sep 17 00:00:00 2001 From: Fan Yong Date: Wed, 29 Nov 2017 10:53:36 +0800 Subject: [PATCH] LU-7585 osd-ldiskfs: auto scrub control Originally, there was a lproc interface in OSD, named "auto_scrub", used for the control of auto detecting inconsistent OI mapping and trigger OI scrub. But such switch is too simily, either 'on' or 'off'. It is not convenient for the real system control. For example: If we just finished one cycle OI scrub, should we auto detect OI inconsistency in the subsequent lookup()? If yes, it will cause some unnecessary overhead. But if no, then as long as there was once OI scrub, we will have no more chance to auto detect corrupted OI mapping. To resolve such trouble, this patch enhances the lproc interface "auto_scrub" to allow the system administrator to specify how long (in seconds) the system can be trusted after the lastest OI scrub. During such trusted interval, we will not auto detect inconsistent OI mapping. the default value is one month (60 * 60 * 24 * 30). The patch also replaces cfs_time_current_sec() with ktime_get_real_seconds() for some cleanup work. Signed-off-by: Fan Yong Change-Id: Iae2c7dd1da92c27d40357c62cd94e886228c86f7 Reviewed-on: https://review.whamcloud.com/29710 Tested-by: Jenkins Tested-by: Maloo Reviewed-by: Andreas Dilger Reviewed-by: Lai Siyao Reviewed-by: Alex Zhuravlev Reviewed-by: Oleg Drokin --- lustre/include/lustre_scrub.h | 19 +++++++++++++++++++ lustre/osd-ldiskfs/osd_handler.c | 15 ++++++++++----- lustre/osd-ldiskfs/osd_internal.h | 2 +- lustre/osd-ldiskfs/osd_lproc.c | 4 ++-- lustre/osd-ldiskfs/osd_scrub.c | 11 ++++++----- 5 files changed, 38 insertions(+), 13 deletions(-) diff --git a/lustre/include/lustre_scrub.h b/lustre/include/lustre_scrub.h index e858586..3c4561c 100644 --- a/lustre/include/lustre_scrub.h +++ b/lustre/include/lustre_scrub.h @@ -163,6 +163,25 @@ enum osd_lf_flags { OLF_IDX_IN_FID = 0x0010, }; +/* There are some overhead to detect OI inconsistency automatically + * during normal RPC handling. We do not want to always auto detect + * OI inconsistency especailly when OI scrub just done recently. + * + * The 'auto_scrub' defines the time (united as second) interval to + * enable auto detect OI inconsistency since last OI scurb done. */ +enum auto_scrub { + /* Disable auto scrub. */ + AS_NEVER = 0, + + /* 1 second is too short interval, it is almost equal to always auto + * detect inconsistent OI, usually used for test. */ + AS_ALWAYS = 1, + + /* Enable auto detect OI inconsistency one month (60 * 60 * 24 * 30) + * after last OI scrub. */ + AS_DEFAULT = 2592000LL, +}; + struct scrub_file { /* 128-bit uuid for volume. */ __u8 sf_uuid[16]; diff --git a/lustre/osd-ldiskfs/osd_handler.c b/lustre/osd-ldiskfs/osd_handler.c index 84edec75..5a5374c 100644 --- a/lustre/osd-ldiskfs/osd_handler.c +++ b/lustre/osd-ldiskfs/osd_handler.c @@ -1121,7 +1121,7 @@ trigger: goto found; } - if (dev->od_noscrub) { + if (dev->od_auto_scrub_interval == AS_NEVER) { if (!remote) GOTO(out, result = -EREMCHG); @@ -5011,10 +5011,14 @@ osd_consistency_check(struct osd_thread_info *oti, struct osd_device *dev, if (!fid_is_norm(fid) && !fid_is_igif(fid)) RETURN(0); - if (dev->od_noscrub && !thread_is_running(&scrub->os_thread)) + if (thread_is_running(&scrub->os_thread) && + scrub->os_pos_current > id->oii_ino) RETURN(0); - if (scrub->os_pos_current > id->oii_ino) + if (dev->od_auto_scrub_interval == AS_NEVER || + cfs_time_before(ktime_get_real_seconds(), + scrub->os_file.sf_time_last_complete + + dev->od_auto_scrub_interval)) RETURN(0); again: @@ -5075,7 +5079,7 @@ trigger: GOTO(out, rc); } - if (!dev->od_noscrub && ++once == 1) { + if (dev->od_auto_scrub_interval != AS_NEVER && ++once == 1) { rc = osd_scrub_start(oti->oti_env, dev, SS_AUTO_PARTIAL | SS_CLEAR_DRYRUN | SS_CLEAR_FAILOUT); CDEBUG(D_LFSCK | D_CONSOLE | D_WARNING, @@ -7061,7 +7065,7 @@ static int osd_mount(const struct lu_env *env, } if (lmd_flags & LMD_FLG_NOSCRUB) - o->od_noscrub = 1; + o->od_auto_scrub_interval = AS_NEVER; GOTO(out, rc = 0); @@ -7116,6 +7120,7 @@ static int osd_device_init0(const struct lu_env *env, o->od_read_cache = 1; o->od_writethrough_cache = 1; o->od_readcache_max_filesize = OSD_MAX_CACHE_SIZE; + o->od_auto_scrub_interval = AS_DEFAULT; cplen = strlcpy(o->od_svname, lustre_cfg_string(cfg, 4), sizeof(o->od_svname)); diff --git a/lustre/osd-ldiskfs/osd_internal.h b/lustre/osd-ldiskfs/osd_internal.h index 768d905..c794bcf 100644 --- a/lustre/osd-ldiskfs/osd_internal.h +++ b/lustre/osd-ldiskfs/osd_internal.h @@ -255,12 +255,12 @@ struct osd_device { */ unsigned int od_fl_capa:1, od_maybe_new:1, - od_noscrub:1, od_igif_inoi:1, od_check_ff:1, od_is_ost:1, od_index_in_idif:1; + __s64 od_auto_scrub_interval; __u32 od_dirent_journal; int od_index; struct proc_dir_entry *od_proc_entry; diff --git a/lustre/osd-ldiskfs/osd_lproc.c b/lustre/osd-ldiskfs/osd_lproc.c index bbd5779..6d905fe 100644 --- a/lustre/osd-ldiskfs/osd_lproc.c +++ b/lustre/osd-ldiskfs/osd_lproc.c @@ -383,7 +383,7 @@ static int ldiskfs_osd_auto_scrub_seq_show(struct seq_file *m, void *data) if (unlikely(dev->od_mnt == NULL)) return -EINPROGRESS; - seq_printf(m, "%d\n", !dev->od_noscrub); + seq_printf(m, "%lld\n", dev->od_auto_scrub_interval); return 0; } @@ -405,7 +405,7 @@ ldiskfs_osd_auto_scrub_seq_write(struct file *file, const char __user *buffer, if (rc) return rc; - dev->od_noscrub = !val; + dev->od_auto_scrub_interval = val; return count; } LPROC_SEQ_FOPS(ldiskfs_osd_auto_scrub); diff --git a/lustre/osd-ldiskfs/osd_scrub.c b/lustre/osd-ldiskfs/osd_scrub.c index 358edc8..82edd4a 100644 --- a/lustre/osd-ldiskfs/osd_scrub.c +++ b/lustre/osd-ldiskfs/osd_scrub.c @@ -509,7 +509,7 @@ static int osd_scrub_prep(const struct lu_env *env, struct osd_device *dev) scrub->os_pos_current = sf->sf_pos_latest_start; sf->sf_status = SS_SCANNING; - sf->sf_time_latest_start = cfs_time_current_sec(); + sf->sf_time_latest_start = ktime_get_real_seconds(); sf->sf_time_last_checkpoint = sf->sf_time_latest_start; sf->sf_pos_last_checkpoint = sf->sf_pos_latest_start - 1; rc = scrub_file_store(env, scrub); @@ -544,7 +544,7 @@ static int osd_scrub_post(const struct lu_env *env, struct osd_device *dev, scrub->os_new_checked = 0; sf->sf_pos_last_checkpoint = scrub->os_pos_current; } - sf->sf_time_last_checkpoint = cfs_time_current_sec(); + sf->sf_time_last_checkpoint = ktime_get_real_seconds(); if (result > 0) { dev->od_igif_inoi = 1; dev->od_check_ff = 0; @@ -1003,7 +1003,7 @@ static void osd_scrub_join(const struct lu_env *env, struct osd_device *dev, sf->sf_pos_latest_start = LDISKFS_FIRST_INO(osd_sb(dev)) + 1; scrub->os_pos_current = sf->sf_pos_latest_start; - sf->sf_time_latest_start = cfs_time_current_sec(); + sf->sf_time_latest_start = ktime_get_real_seconds(); sf->sf_time_last_checkpoint = sf->sf_time_latest_start; sf->sf_pos_last_checkpoint = sf->sf_pos_latest_start - 1; rc = scrub_file_store(env, scrub); @@ -2545,7 +2545,8 @@ int osd_scrub_setup(const struct lu_env *env, struct osd_device *dev) * later if found that the system is upgrading. */ dev->od_igif_inoi = 1; - if (!dev->od_dt_dev.dd_rdonly && !dev->od_noscrub && + if (!dev->od_dt_dev.dd_rdonly && + dev->od_auto_scrub_interval != AS_NEVER && ((sf->sf_status == SS_PAUSED) || (sf->sf_status == SS_CRASHED && sf->sf_flags & (SF_RECREATED | SF_INCONSISTENT | @@ -2885,7 +2886,7 @@ int osd_oii_insert(struct osd_device *dev, struct osd_idmap_cache *oic, oii->oii_insert = insert; if (lscrub->os_partial_scan) { - __u64 now = cfs_time_current_sec(); + __u64 now = ktime_get_real_seconds(); /* If there haven't been errors in a long time, * decay old count until either the errors are -- 1.8.3.1