From f4ea7b630b8adc9856ee67c6d16549f36e14efd1 Mon Sep 17 00:00:00 2001 From: Fan Yong Date: Sat, 12 Jan 2013 08:41:01 +0800 Subject: [PATCH] LU-1866 osd: ancillary work for initial OI scrub 1) Abstract some general interfaces that can be shared by common OSD operations, by normal OI scrub, and by initial OI scrub. 2) Mark the object as "LDISKFS_STATE_LUSTRE_NO_OI" if it is not unnecessary to add OI mapping for the object, such as backend local root, OI directories/files, and others created before OI files created. 3) Do not check fld in OI scurb, which may be not ready yet. 4) Reorganize OI scrub code to be more readable. Signed-off-by: Fan Yong Change-Id: Ife8168df2454437ad18550b73d26f7372c21776f Reviewed-on: http://review.whamcloud.com/4902 Reviewed-by: Alex Zhuravlev Tested-by: Hudson Reviewed-by: Andreas Dilger Tested-by: Maloo --- lustre/osd-ldiskfs/osd_compat.c | 4 + lustre/osd-ldiskfs/osd_handler.c | 111 ++++++------ lustre/osd-ldiskfs/osd_internal.h | 23 ++- lustre/osd-ldiskfs/osd_oi.c | 23 +-- lustre/osd-ldiskfs/osd_oi.h | 3 +- lustre/osd-ldiskfs/osd_quota.c | 4 - lustre/osd-ldiskfs/osd_scrub.c | 372 ++++++++++++++++++++++---------------- lustre/osd-ldiskfs/osd_scrub.h | 2 +- 8 files changed, 301 insertions(+), 241 deletions(-) diff --git a/lustre/osd-ldiskfs/osd_compat.c b/lustre/osd-ldiskfs/osd_compat.c index e1a04db..3d154af 100644 --- a/lustre/osd-ldiskfs/osd_compat.c +++ b/lustre/osd-ldiskfs/osd_compat.c @@ -158,6 +158,7 @@ int osd_ost_init(struct osd_device *dev) if (IS_ERR(d)) GOTO(cleanup, rc = PTR_ERR(d)); + ldiskfs_set_inode_state(d->d_inode, LDISKFS_STATE_LUSTRE_NO_OI); dev->od_ost_map->om_root = d; cleanup: @@ -373,6 +374,7 @@ static int osd_seq_load_locked(struct osd_device *osd, else if (seq_dir->d_inode == NULL) GOTO(out_put, rc = -EFAULT); + ldiskfs_set_inode_state(seq_dir->d_inode, LDISKFS_STATE_LUSTRE_NO_OI); osd_seq->oos_root = seq_dir; LASSERT(osd_seq->oos_dirs == NULL); @@ -391,6 +393,8 @@ static int osd_seq_load_locked(struct osd_device *osd, if (IS_ERR(dir)) { rc = PTR_ERR(dir); } else if (dir->d_inode) { + ldiskfs_set_inode_state(dir->d_inode, + LDISKFS_STATE_LUSTRE_NO_OI); osd_seq->oos_dirs[i] = dir; rc = 0; } else { diff --git a/lustre/osd-ldiskfs/osd_handler.c b/lustre/osd-ldiskfs/osd_handler.c index 495f4ba..b1edd8f 100644 --- a/lustre/osd-ldiskfs/osd_handler.c +++ b/lustre/osd-ldiskfs/osd_handler.c @@ -170,14 +170,20 @@ static struct lu_object *osd_object_alloc(const struct lu_env *env, } } -static int osd_get_lma(struct osd_thread_info *info, struct inode *inode, - struct dentry *dentry, struct lustre_mdt_attrs *lma) +static inline int __osd_xattr_get(struct inode *inode, struct dentry *dentry, + const char *name, void *buf, int len) +{ + dentry->d_inode = inode; + return inode->i_op->getxattr(dentry, name, buf, len); +} + +int osd_get_lma(struct osd_thread_info *info, struct inode *inode, + struct dentry *dentry, struct lustre_mdt_attrs *lma) { int rc; - dentry->d_inode = inode; - rc = inode->i_op->getxattr(dentry, XATTR_NAME_LMA, (void *)lma, - sizeof(*lma)); + rc = __osd_xattr_get(inode, dentry, XATTR_NAME_LMA, (void *)lma, + sizeof(*lma)); if (rc == -ERANGE) { /* try with old lma size */ rc = inode->i_op->getxattr(dentry, XATTR_NAME_LMA, @@ -252,8 +258,9 @@ struct inode *osd_iget(struct osd_thread_info *info, struct osd_device *dev, return inode; } -struct inode *osd_iget_fid(struct osd_thread_info *info, struct osd_device *dev, - struct osd_inode_id *id, struct lu_fid *fid) +static struct inode * +osd_iget_fid(struct osd_thread_info *info, struct osd_device *dev, + struct osd_inode_id *id, struct lu_fid *fid) { struct lustre_mdt_attrs *lma = &info->oti_mdt_attrs; struct inode *inode; @@ -367,7 +374,7 @@ static int osd_fid_lookup(const struct lu_env *env, struct osd_object *obj, GOTO(out, result = 0); /* Search order: 3. OI files. */ - result = osd_oi_lookup(info, dev, fid, id); + result = osd_oi_lookup(info, dev, fid, id, true); if (result == -ENOENT) { if (!fid_is_norm(fid) || !ldiskfs_test_bit(osd_oi_fid2idx(dev,fid), @@ -1035,19 +1042,6 @@ static void osd_conf_get(const struct lu_env *env, } -/** - * Helper function to get and fill the buffer with input values. - */ -static struct lu_buf *osd_buf_get(const struct lu_env *env, void *area, ssize_t len) -{ - struct lu_buf *buf; - - buf = &osd_oti_get(env)->oti_buf; - buf->lb_buf = area; - buf->lb_len = len; - return buf; -} - /* * Concurrency: shouldn't matter. */ @@ -1695,7 +1689,10 @@ static int osd_mkfile(struct osd_thread_info *info, struct osd_object *obj, * NB: don't need any lock because no contention at this * early stage */ inode->i_flags |= S_NOCMTIME; - inode->i_state |= I_LUSTRE_NOSCRUB; + + /* For new created object, it must be consistent, + * and it is unnecessary to scrub against it. */ + ldiskfs_set_inode_state(inode, LDISKFS_STATE_LUSTRE_NOSCRUB); obj->oo_inode = inode; result = 0; } else { @@ -2167,33 +2164,15 @@ static int osd_object_destroy(const struct lu_env *env, RETURN(0); } -/** - * Helper function for osd_xattr_set() - */ -static int __osd_xattr_set(const struct lu_env *env, struct dt_object *dt, - const struct lu_buf *buf, const char *name, int fl) -{ - struct osd_object *obj = osd_dt_obj(dt); - struct inode *inode = obj->oo_inode; - struct osd_thread_info *info = osd_oti_get(env); - struct dentry *dentry = &info->oti_child_dentry; - int fs_flags = 0; - int rc; - - LASSERT(dt_object_exists(dt)); - LASSERT(inode->i_op != NULL && inode->i_op->setxattr != NULL); - - if (fl & LU_XATTR_REPLACE) - fs_flags |= XATTR_REPLACE; - - if (fl & LU_XATTR_CREATE) - fs_flags |= XATTR_CREATE; +static inline int __osd_xattr_set(struct osd_thread_info *info, + struct inode *inode, const char *name, + const void *buf, int buflen, int fl) +{ + struct dentry *dentry = &info->oti_child_dentry; ll_vfs_dq_init(inode); - dentry->d_inode = inode; - rc = inode->i_op->setxattr(dentry, name, buf->lb_buf, - buf->lb_len, fs_flags); - return rc; + dentry->d_inode = inode; + return inode->i_op->setxattr(dentry, name, buf, buflen, fl); } /** @@ -2209,15 +2188,17 @@ static int __osd_xattr_set(const struct lu_env *env, struct dt_object *dt, static int osd_ea_fid_set(const struct lu_env *env, struct dt_object *dt, const struct lu_fid *fid) { - struct osd_thread_info *info = osd_oti_get(env); - struct lustre_mdt_attrs *mdt_attrs = &info->oti_mdt_attrs; + struct osd_thread_info *info = osd_oti_get(env); + struct inode *inode = osd_dt_obj(dt)->oo_inode; + struct lustre_mdt_attrs *lma = &info->oti_mdt_attrs; + int rc; - lustre_lma_init(mdt_attrs, fid); - lustre_lma_swab(mdt_attrs); - return __osd_xattr_set(env, dt, - osd_buf_get(env, mdt_attrs, sizeof *mdt_attrs), - XATTR_NAME_LMA, LU_XATTR_CREATE); + lustre_lma_init(lma, fid); + lustre_lma_swab(lma); + rc = __osd_xattr_set(info, inode, XATTR_NAME_LMA, lma, sizeof(*lma), + XATTR_CREATE); + return rc; } /** @@ -2471,8 +2452,7 @@ static int osd_xattr_get(const struct lu_env *env, struct dt_object *dt, if (osd_object_auth(env, dt, capa, CAPA_OPC_META_READ)) return -EACCES; - dentry->d_inode = inode; - return inode->i_op->getxattr(dentry, name, buf->lb_buf, buf->lb_len); + return __osd_xattr_get(inode, dentry, name, buf->lb_buf, buf->lb_len); } @@ -2521,6 +2501,11 @@ static int osd_xattr_set(const struct lu_env *env, struct dt_object *dt, const struct lu_buf *buf, const char *name, int fl, struct thandle *handle, struct lustre_capa *capa) { + struct osd_object *obj = osd_dt_obj(dt); + struct inode *inode = obj->oo_inode; + struct osd_thread_info *info = osd_oti_get(env); + int fs_flags = 0; + LASSERT(handle != NULL); /* version set is not real XATTR */ @@ -2536,7 +2521,14 @@ static int osd_xattr_set(const struct lu_env *env, struct dt_object *dt, return -EACCES; osd_trans_exec_op(env, handle, OSD_OT_XATTR_SET); - return __osd_xattr_set(env, dt, buf, name, fl); + if (fl & LU_XATTR_REPLACE) + fs_flags |= XATTR_REPLACE; + + if (fl & LU_XATTR_CREATE) + fs_flags |= XATTR_CREATE; + + return __osd_xattr_set(info, inode, name, buf->lb_buf, buf->lb_len, + fs_flags); } /* @@ -2594,7 +2586,6 @@ static int osd_xattr_del(const struct lu_env *env, struct dt_object *dt, LASSERT(dt_object_exists(dt)); LASSERT(inode->i_op != NULL && inode->i_op->removexattr != NULL); - LASSERT(osd_write_locked(env, obj)); LASSERT(handle != NULL); if (osd_object_auth(env, dt, capa, CAPA_OPC_META_WRITE)) @@ -3426,7 +3417,7 @@ osd_consistency_check(struct osd_thread_info *oti, struct osd_device *dev, RETURN_EXIT; again: - rc = osd_oi_lookup(oti, dev, fid, id); + rc = osd_oi_lookup(oti, dev, fid, id, true); if (rc != 0 && rc != -ENOENT) RETURN_EXIT; @@ -4552,6 +4543,8 @@ static int osd_mount(const struct lu_env *env, GOTO(out, rc = -EINVAL); } + ldiskfs_set_inode_state(osd_sb(o)->s_root->d_inode, + LDISKFS_STATE_LUSTRE_NO_OI); if (lmd_flags & LMD_FLG_NOSCRUB) o->od_noscrub = 1; diff --git a/lustre/osd-ldiskfs/osd_internal.h b/lustre/osd-ldiskfs/osd_internal.h index cce3b12..f28af74 100644 --- a/lustre/osd-ldiskfs/osd_internal.h +++ b/lustre/osd-ldiskfs/osd_internal.h @@ -83,12 +83,24 @@ struct inode; #define OSD_COUNTERS (0) -/* Lustre special inode::i_state to indicate OI scrub skip this inode. */ -#define I_LUSTRE_NOSCRUB (1 << 31) +/* ldiskfs special inode::i_state_flags need to be accessed with + * ldiskfs_{set,clear,test}_inode_state() only */ + +/* OI scrub should skip this inode. */ +#define LDISKFS_STATE_LUSTRE_NOSCRUB 31 + +/* Do not add OI mapping for this inode. */ +#define LDISKFS_STATE_LUSTRE_NO_OI 30 /** Enable thandle usage statistics */ #define OSD_THANDLE_STATS (0) +#define MAX_OBJID_GROUP (FID_SEQ_ECHO + 1) + +#define OBJECTS "OBJECTS" +#define ADMIN_USR "admin_quotafile_v2.usr" +#define ADMIN_GRP "admin_quotafile_v2.grp" + struct osd_directory { struct iam_container od_container; struct iam_descr od_descr; @@ -244,7 +256,8 @@ struct osd_device { struct obd_statfs od_statfs; spinlock_t od_osfs_lock; - unsigned int od_noscrub:1; + unsigned int od_noscrub:1, + od_handle_nolma:1; struct fsfilt_operations *od_fsops; int od_connects; @@ -594,8 +607,8 @@ int osd_object_auth(const struct lu_env *env, struct dt_object *dt, struct lustre_capa *capa, __u64 opc); struct inode *osd_iget(struct osd_thread_info *info, struct osd_device *dev, struct osd_inode_id *id); -struct inode *osd_iget_fid(struct osd_thread_info *info, struct osd_device *dev, - struct osd_inode_id *id, struct lu_fid *fid); +int osd_get_lma(struct osd_thread_info *info, struct inode *inode, + struct dentry *dentry, struct lustre_mdt_attrs *lma); int osd_obj_map_init(struct osd_device *osd); void osd_obj_map_fini(struct osd_device *dev); diff --git a/lustre/osd-ldiskfs/osd_oi.c b/lustre/osd-ldiskfs/osd_oi.c index 43ba6e8..bb3efa4 100644 --- a/lustre/osd-ldiskfs/osd_oi.c +++ b/lustre/osd-ldiskfs/osd_oi.c @@ -237,6 +237,7 @@ static int osd_oi_open(struct osd_thread_info *info, struct osd_device *osd, if (IS_ERR(inode)) RETURN(PTR_ERR(inode)); + ldiskfs_set_inode_state(inode, LDISKFS_STATE_LUSTRE_NO_OI); OBD_ALLOC_PTR(oi); if (oi == NULL) GOTO(out_inode, rc = -ENOMEM); @@ -441,7 +442,6 @@ static int osd_oi_iam_lookup(struct osd_thread_info *oti, { struct iam_container *bag; struct iam_iterator *it = &oti->oti_idx_it; - struct iam_rec *iam_rec; struct iam_path_descr *ipd; int rc; ENTRY; @@ -458,17 +458,8 @@ static int osd_oi_iam_lookup(struct osd_thread_info *oti, iam_it_init(it, bag, 0, ipd); rc = iam_it_get(it, (struct iam_key *)key); - if (rc >= 0) { - if (S_ISDIR(oi->oi_inode->i_mode)) - iam_rec = (struct iam_rec *)oti->oti_ldp; - else - iam_rec = (struct iam_rec *)rec; - - iam_reccpy(&it->ii_path.ip_leaf, (struct iam_rec *)iam_rec); - if (S_ISDIR(oi->oi_inode->i_mode)) - osd_fid_unpack((struct lu_fid *)rec, - (struct osd_fid_pack *)iam_rec); - } + if (rc > 0) + iam_reccpy(&it->ii_path.ip_leaf, (struct iam_rec *)rec); iam_it_put(it); iam_it_fini(it); osd_ipd_put(oti->oti_env, bag, ipd); @@ -523,12 +514,14 @@ int __osd_oi_lookup(struct osd_thread_info *info, struct osd_device *osd, } int osd_oi_lookup(struct osd_thread_info *info, struct osd_device *osd, - const struct lu_fid *fid, struct osd_inode_id *id) + const struct lu_fid *fid, struct osd_inode_id *id, + bool check_fld) { int rc = 0; - if ((!fid_is_last_id(fid) && fid_is_on_ost(info, osd, fid)) || - fid_is_llog(fid)) { + if ((!fid_is_last_id(fid) && check_fld && + fid_is_on_ost(info, osd, fid)) || + fid_is_llog(fid)) { /* old OSD obj id */ /* FIXME: actually for all of the OST object */ rc = osd_obj_map_lookup(info, osd, fid, id); diff --git a/lustre/osd-ldiskfs/osd_oi.h b/lustre/osd-ldiskfs/osd_oi.h index bb97b49..2837ddc 100644 --- a/lustre/osd-ldiskfs/osd_oi.h +++ b/lustre/osd-ldiskfs/osd_oi.h @@ -131,7 +131,8 @@ void osd_oi_fini(struct osd_thread_info *info, struct osd_device *osd); int __osd_oi_lookup(struct osd_thread_info *info, struct osd_device *osd, const struct lu_fid *fid, struct osd_inode_id *id); int osd_oi_lookup(struct osd_thread_info *info, struct osd_device *osd, - const struct lu_fid *fid, struct osd_inode_id *id); + const struct lu_fid *fid, struct osd_inode_id *id, + bool check_fld); int osd_oi_insert(struct osd_thread_info *info, struct osd_device *osd, const struct lu_fid *fid, const struct osd_inode_id *id, struct thandle *th); diff --git a/lustre/osd-ldiskfs/osd_quota.c b/lustre/osd-ldiskfs/osd_quota.c index 8c17b13..4e911e3 100644 --- a/lustre/osd-ldiskfs/osd_quota.c +++ b/lustre/osd-ldiskfs/osd_quota.c @@ -933,10 +933,6 @@ out: RETURN(rc); } -#define OBJECTS "OBJECTS" -#define ADMIN_USR "admin_quotafile_v2.usr" -#define ADMIN_GRP "admin_quotafile_v2.grp" - int osd_quota_migration(const struct lu_env *env, struct dt_object *dt, const struct dt_index_features *feat) { diff --git a/lustre/osd-ldiskfs/osd_scrub.c b/lustre/osd-ldiskfs/osd_scrub.c index 41791f6..62d5c6f 100644 --- a/lustre/osd-ldiskfs/osd_scrub.c +++ b/lustre/osd-ldiskfs/osd_scrub.c @@ -50,6 +50,17 @@ #define HALF_SEC (CFS_HZ >> 1) +#define SCRUB_NEXT_BREAK 1 /* exit current loop and process next group */ +#define SCRUB_NEXT_CONTINUE 2 /* skip current object and process next bit */ +#define SCRUB_NEXT_EXIT 3 /* exit all the loops */ +#define SCRUB_NEXT_WAIT 4 /* wait for free cache slot */ +#define SCRUB_NEXT_CRASH 5 /* simulate system crash during OI scrub */ +#define SCRUB_NEXT_FATAL 6 /* simulate failure during OI scrub */ +#define SCRUB_NEXT_NOSCRUB 7 /* new created object, no scrub on it */ +#define SCRUB_NEXT_NOLMA 8 /* the inode has no FID-in-LMA */ + +/* misc functions */ + static inline struct osd_device *osd_scrub2dev(struct osd_scrub *scrub) { return container_of0(scrub, struct osd_device, od_scrub); @@ -60,6 +71,93 @@ static inline struct super_block *osd_scrub2sb(struct osd_scrub *scrub) return osd_sb(osd_scrub2dev(scrub)); } +static inline int osd_scrub_has_window(struct osd_scrub *scrub, + struct osd_otable_cache *ooc) +{ + return scrub->os_pos_current < ooc->ooc_pos_preload + SCRUB_WINDOW_SIZE; +} + +static int osd_scrub_refresh_mapping(struct osd_thread_info *info, + struct osd_device *dev, + const struct lu_fid *fid, + const struct osd_inode_id *id, int ops) +{ + struct lu_fid *oi_fid = &info->oti_fid2; + struct osd_inode_id *oi_id = &info->oti_id2; + struct iam_container *bag; + struct iam_path_descr *ipd; + handle_t *jh; + int rc; + ENTRY; + + fid_cpu_to_be(oi_fid, fid); + osd_id_pack(oi_id, id); + jh = ldiskfs_journal_start_sb(osd_sb(dev), + osd_dto_credits_noquota[ops]); + if (IS_ERR(jh)) { + rc = PTR_ERR(jh); + CERROR("%.16s: fail to start trans for scrub store: rc = %d\n", + LDISKFS_SB(osd_sb(dev))->s_es->s_volume_name, rc); + RETURN(rc); + } + + bag = &osd_fid2oi(dev, fid)->oi_dir.od_container; + ipd = osd_idx_ipd_get(info->oti_env, bag); + if (unlikely(ipd == NULL)) { + ldiskfs_journal_stop(jh); + CERROR("%.16s: fail to get ipd for scrub store\n", + LDISKFS_SB(osd_sb(dev))->s_es->s_volume_name); + RETURN(-ENOMEM); + } + + if (ops == DTO_INDEX_UPDATE) { + rc = iam_update(jh, bag, (const struct iam_key *)oi_fid, + (struct iam_rec *)oi_id, ipd); + } else { + rc = iam_insert(jh, bag, (const struct iam_key *)oi_fid, + (struct iam_rec *)oi_id, ipd); + if (rc == -EEXIST) { + rc = 1; + /* XXX: There are trouble things when adding OI + * mapping for IGIF object, which may cause + * multiple objects to be mapped to the same + * IGIF formatted FID. Consider the following + * situations: + * + * 1) The MDT is upgrading from 1.8 device. + * The OI scrub generates IGIF FID1 for the + * OBJ1 and adds the OI mapping. + * + * 2) For some reason, the OI scrub does not + * process all the IGIF objects completely. + * + * 3) The MDT is backuped and restored against + * this device. + * + * 4) When the MDT mounts up, the OI scrub will + * try to rebuild the OI files. For some IGIF + * object, OBJ2, which was not processed by the + * OI scrub before the backup/restore, and the + * new generated IGIF formatted FID may be just + * the FID1, the same as OBJ1. + * + * Under such case, the OI scrub cannot know how + * to generate new FID for the OBJ2. + * + * Currently, we do nothing for that. One possible + * solution is to generate new normal FID for the + * conflict object. + * + * Anyway, it is rare, only exists in theory. */ + } + } + osd_ipd_put(info->oti_env, bag, ipd); + ldiskfs_journal_stop(jh); + RETURN(rc); +} + +/* OI_scrub file ops */ + static void osd_scrub_file_to_cpu(struct scrub_file *des, struct scrub_file *src) { @@ -229,6 +327,8 @@ int osd_scrub_file_store(struct osd_scrub *scrub) return rc; } +/* OI scrub APIs */ + static int osd_scrub_prep(struct osd_device *dev) { struct osd_scrub *scrub = &dev->od_scrub; @@ -247,7 +347,7 @@ static int osd_scrub_prep(struct osd_device *dev) if (flags & SS_RESET) osd_scrub_file_reset(scrub, - LDISKFS_SB(osd_sb(dev))->s_es->s_uuid, sf->sf_flags); + LDISKFS_SB(osd_sb(dev))->s_es->s_uuid, 0); if (flags & SS_AUTO) { scrub->os_full_speed = 1; @@ -261,6 +361,7 @@ static int osd_scrub_prep(struct osd_device *dev) scrub->os_in_prior = 0; scrub->os_waiting = 0; + scrub->os_paused = 0; scrub->os_new_checked = 0; if (sf->sf_pos_last_checkpoint != 0) sf->sf_pos_latest_start = sf->sf_pos_last_checkpoint + 1; @@ -284,55 +385,48 @@ static int osd_scrub_prep(struct osd_device *dev) } static int -osd_scrub_error(struct osd_device *dev, struct osd_inode_id *lid, int rc) -{ - struct osd_scrub *scrub = &dev->od_scrub; - struct scrub_file *sf = &scrub->os_file; - - down_write(&scrub->os_rwsem); - scrub->os_new_checked++; - sf->sf_items_failed++; - if (sf->sf_pos_first_inconsistent == 0 || - sf->sf_pos_first_inconsistent > lid->oii_ino) - sf->sf_pos_first_inconsistent = lid->oii_ino; - up_write(&scrub->os_rwsem); - return sf->sf_param & SP_FAILOUT ? rc : 0; -} - -static int osd_scrub_check_update(struct osd_thread_info *info, struct osd_device *dev, - struct osd_idmap_cache *oic) + struct osd_idmap_cache *oic, int val) { struct osd_scrub *scrub = &dev->od_scrub; struct scrub_file *sf = &scrub->os_file; + struct lu_fid *fid = &oic->oic_fid; + struct osd_inode_id *lid = &oic->oic_lid; struct osd_inode_id *lid2 = &info->oti_id; - struct lu_fid *oi_fid = &info->oti_fid; - struct osd_inode_id *oi_id = &info->oti_id2; - handle_t *jh = NULL; struct osd_inconsistent_item *oii = NULL; struct inode *inode = NULL; - struct lu_fid *fid = &oic->oic_fid; - struct osd_inode_id *lid = &oic->oic_lid; - struct iam_container *bag; - struct iam_path_descr *ipd; int ops = DTO_INDEX_UPDATE; int idx; int rc; ENTRY; + down_write(&scrub->os_rwsem); + scrub->os_new_checked++; + if (val < 0) + GOTO(out, rc = val); + if (scrub->os_in_prior) oii = cfs_list_entry(oic, struct osd_inconsistent_item, oii_cache); - down_write(&scrub->os_rwsem); - scrub->os_new_checked++; if (lid->oii_ino < sf->sf_pos_latest_start && oii == NULL) GOTO(out, rc = 0); - if (oii != NULL && oii->oii_insert) + if (fid_is_igif(fid)) + sf->sf_items_igif++; + + if (val == SCRUB_NEXT_NOLMA && !dev->od_handle_nolma) + GOTO(out, rc = 0); + + if ((oii != NULL && oii->oii_insert) || (val == SCRUB_NEXT_NOLMA)) goto iget; - rc = osd_oi_lookup(info, dev, fid, lid2); + /* XXX: Currently, no FID-in-LMA for OST object, so osd_oi_lookup() + * wihtout checking FLD is enough. + * + * It should be updated if FID-in-LMA for OSD object introduced + * in the future. */ + rc = osd_oi_lookup(info, dev, fid, lid2, false); if (rc != 0) { if (rc != -ENOENT) GOTO(out, rc); @@ -357,45 +451,16 @@ iget: ops = DTO_INDEX_INSERT; idx = osd_oi_fid2idx(dev, fid); + sf->sf_flags |= SF_RECREATED | SF_INCONSISTENT; if (unlikely(!ldiskfs_test_bit(idx, sf->sf_oi_bitmap))) ldiskfs_set_bit(idx, sf->sf_oi_bitmap); - sf->sf_flags |= SF_RECREATED; } else if (osd_id_eq(lid, lid2)) { - GOTO(out, rc = 0); - } - - sf->sf_flags |= SF_INCONSISTENT; - fid_cpu_to_be(oi_fid, fid); - osd_id_pack(oi_id, &oic->oic_lid); - jh = ldiskfs_journal_start_sb(osd_sb(dev), - osd_dto_credits_noquota[ops]); - if (IS_ERR(jh)) { - rc = PTR_ERR(jh); - CERROR("%.16s: fail to start trans for scrub store, rc = %d\n", - LDISKFS_SB(osd_sb(dev))->s_es->s_volume_name, rc); - GOTO(out, rc); - } - - bag = &osd_fid2oi(dev, fid)->oi_dir.od_container; - ipd = osd_idx_ipd_get(info->oti_env, bag); - if (unlikely(ipd == NULL)) { - ldiskfs_journal_stop(jh); - CERROR("%.16s: fail to get ipd for scrub store\n", - LDISKFS_SB(osd_sb(dev))->s_es->s_volume_name); - GOTO(out, rc = -ENOMEM); - } - - if (ops == DTO_INDEX_UPDATE) { - rc = iam_update(jh, bag, (const struct iam_key *)oi_fid, - (struct iam_rec *)oi_id, ipd); + GOTO(out, rc = 0); } else { - rc = iam_insert(jh, bag, (const struct iam_key *)oi_fid, - (struct iam_rec *)oi_id, ipd); - if (rc == -EEXIST) - rc = 1; + sf->sf_flags |= SF_INCONSISTENT; } - osd_ipd_put(info->oti_env, bag, ipd); - ldiskfs_journal_stop(jh); + + rc = osd_scrub_refresh_mapping(info, dev, fid, lid, ops); if (rc == 0) { if (scrub->os_in_prior) sf->sf_items_updated_prior++; @@ -432,11 +497,15 @@ out: RETURN(sf->sf_param & SP_FAILOUT ? rc : 0); } -static int do_osd_scrub_checkpoint(struct osd_scrub *scrub) +static int osd_scrub_checkpoint(struct osd_scrub *scrub) { struct scrub_file *sf = &scrub->os_file; int rc; - ENTRY; + + if (likely(cfs_time_before(cfs_time_current(), + scrub->os_time_next_checkpoint) || + scrub->os_new_checked == 0)) + return 0; down_write(&scrub->os_rwsem); sf->sf_items_checked += scrub->os_new_checked; @@ -448,16 +517,7 @@ static int do_osd_scrub_checkpoint(struct osd_scrub *scrub) rc = osd_scrub_file_store(scrub); up_write(&scrub->os_rwsem); - RETURN(rc); -} - -static inline int osd_scrub_checkpoint(struct osd_scrub *scrub) -{ - if (unlikely(cfs_time_beforeq(scrub->os_time_next_checkpoint, - cfs_time_current()) && - scrub->os_new_checked > 0)) - return do_osd_scrub_checkpoint(scrub); - return 0; + return rc; } static void osd_scrub_post(struct osd_scrub *scrub, int result) @@ -501,14 +561,7 @@ static void osd_scrub_post(struct osd_scrub *scrub, int result) EXIT; } -#define SCRUB_NEXT_BREAK 1 /* exit current loop and process next group */ -#define SCRUB_NEXT_CONTINUE 2 /* skip current object and process next bit */ -#define SCRUB_NEXT_EXIT 3 /* exit all the loops */ -#define SCRUB_NEXT_WAIT 4 /* wait for free cache slot */ -#define SCRUB_NEXT_CRASH 5 /* simulate system crash during OI scrub */ -#define SCRUB_NEXT_FATAL 6 /* simulate failure during OI scrub */ -#define SCRUB_NEXT_NOSCRUB 7 /* new created object, no scrub on it */ -#define SCRUB_NEXT_IGIF 8 /* IGIF object */ +/* iteration engine */ struct osd_iit_param { struct super_block *sb; @@ -530,12 +583,6 @@ typedef int (*osd_iit_exec_policy)(struct osd_thread_info *info, struct osd_idmap_cache *oic, int *noslot, int rc); -static inline int osd_scrub_has_window(struct osd_scrub *scrub, - struct osd_otable_cache *ooc) -{ - return scrub->os_pos_current < ooc->ooc_pos_preload + SCRUB_WINDOW_SIZE; -} - static int osd_iit_next(struct osd_iit_param *param, __u32 *pos) { param->offset = ldiskfs_find_next_bit(param->bitmap->b_data, @@ -551,13 +598,14 @@ static int osd_iit_next(struct osd_iit_param *param, __u32 *pos) static int osd_iit_iget(struct osd_thread_info *info, struct osd_device *dev, struct lu_fid *fid, struct osd_inode_id *lid, __u32 pos, - struct super_block *sb, struct inode **pinode) + struct super_block *sb, bool scrub) { - struct inode *inode; - int rc; + struct lustre_mdt_attrs *lma = &info->oti_mdt_attrs; + struct inode *inode; + int rc; osd_id_gen(lid, pos, OSD_OII_NOGEN); - inode = osd_iget_fid(info, dev, lid, fid); + inode = osd_iget(info, dev, lid); if (IS_ERR(inode)) { rc = PTR_ERR(inode); /* The inode may be removed after bitmap searching, or the @@ -570,8 +618,38 @@ static int osd_iit_iget(struct osd_thread_info *info, struct osd_device *dev, return rc; } - *pinode = inode; - return 0; + /* If the inode has no OI mapping, then it is special locally used, + * should be invisible to OI scrub or up layer LFSCK. */ + if (ldiskfs_test_inode_state(inode, LDISKFS_STATE_LUSTRE_NO_OI)) { + iput(inode); + return SCRUB_NEXT_CONTINUE; + } + + if (scrub && + ldiskfs_test_inode_state(inode, LDISKFS_STATE_LUSTRE_NOSCRUB)) { + /* Only skip it for the first OI scrub accessing. */ + ldiskfs_clear_inode_state(inode, LDISKFS_STATE_LUSTRE_NOSCRUB); + iput(inode); + return SCRUB_NEXT_NOSCRUB; + } + + rc = osd_get_lma(info, inode, &info->oti_obj_dentry, lma); + if (rc == 0) { + if (!scrub) { + if (!fid_is_client_visible(&lma->lma_self_fid)) + rc = SCRUB_NEXT_CONTINUE; + else + *fid = lma->lma_self_fid; + } + } else if (rc == -ENODATA) { + lu_igif_build(fid, inode->i_ino, inode->i_generation); + if (scrub) + rc = SCRUB_NEXT_NOLMA; + else + rc = 0; + } + iput(inode); + return rc; } static int osd_scrub_next(struct osd_thread_info *info, struct osd_device *dev, @@ -582,7 +660,6 @@ static int osd_scrub_next(struct osd_thread_info *info, struct osd_device *dev, struct ptlrpc_thread *thread = &scrub->os_thread; struct lu_fid *fid; struct osd_inode_id *lid; - struct inode *inode; int rc; if (OBD_FAIL_CHECK(OBD_FAIL_OSD_SCRUB_DELAY) && cfs_fail_val > 0) { @@ -629,19 +706,7 @@ static int osd_scrub_next(struct osd_thread_info *info, struct osd_device *dev, fid = &(*oic)->oic_fid; lid = &(*oic)->oic_lid; rc = osd_iit_iget(info, dev, fid, lid, - scrub->os_pos_current, param->sb, &inode); - if (rc != 0) - return rc; - - if (inode->i_state & I_LUSTRE_NOSCRUB) { - /* Only skip it for the first OI scrub accessing. */ - inode->i_state &= ~I_LUSTRE_NOSCRUB; - rc = SCRUB_NEXT_NOSCRUB; - } else if (!fid_is_norm(fid)) { - rc = SCRUB_NEXT_IGIF; - } - - iput(inode); + scrub->os_pos_current, param->sb, true); return rc; } @@ -652,7 +717,6 @@ static int osd_preload_next(struct osd_thread_info *info, struct osd_otable_cache *ooc = &dev->od_otable_it->ooi_cache; struct osd_scrub *scrub; struct ptlrpc_thread *thread; - struct inode *inode; int rc; rc = osd_iit_next(param, &ooc->ooc_pos_preload); @@ -668,12 +732,10 @@ static int osd_preload_next(struct osd_thread_info *info, rc = osd_iit_iget(info, dev, &ooc->ooc_cache[ooc->ooc_producer_idx].oic_fid, &ooc->ooc_cache[ooc->ooc_producer_idx].oic_lid, - ooc->ooc_pos_preload, param->sb, &inode); + ooc->ooc_pos_preload, param->sb, false); /* If succeed, it needs to move forward; otherwise up layer LFSCK may * ignore the failure, so it still need to skip the inode next time. */ ooc->ooc_pos_preload = param->gbase + ++(param->offset); - if (rc == 0) - iput(inode); return rc; } @@ -684,7 +746,6 @@ static int osd_scrub_exec(struct osd_thread_info *info, struct osd_device *dev, struct l_wait_info lwi = { 0 }; struct osd_scrub *scrub = &dev->od_scrub; struct scrub_file *sf = &scrub->os_file; - __u64 *items = NULL; struct ptlrpc_thread *thread = &scrub->os_thread; struct osd_otable_it *it = dev->od_otable_it; struct osd_otable_cache *ooc = it ? &it->ooi_cache : NULL; @@ -695,27 +756,14 @@ static int osd_scrub_exec(struct osd_thread_info *info, struct osd_device *dev, case SCRUB_NEXT_WAIT: goto wait; case SCRUB_NEXT_NOSCRUB: - items = &sf->sf_items_noscrub; - break; - case SCRUB_NEXT_IGIF: - items = &sf->sf_items_igif; - break; - } - - if (items != NULL) { down_write(&scrub->os_rwsem); scrub->os_new_checked++; - (*items)++; + sf->sf_items_noscrub++; up_write(&scrub->os_rwsem); goto next; } - LASSERTF(rc <= 0, "unexpected rc = %d\n", rc); - - if (rc != 0) - rc = osd_scrub_error(dev, &oic->oic_lid, rc); - else - rc = osd_scrub_check_update(info, dev, oic); + rc = osd_scrub_check_update(info, dev, oic, rc); if (rc != 0) return rc; @@ -860,6 +908,32 @@ next_group: RETURN(0); } +static int osd_otable_it_preload(const struct lu_env *env, + struct osd_otable_it *it) +{ + struct osd_device *dev = it->ooi_dev; + struct osd_scrub *scrub = &dev->od_scrub; + struct osd_otable_cache *ooc = &it->ooi_cache; + int rc; + ENTRY; + + rc = osd_inode_iteration(osd_oti_get(env), dev, + OSD_OTABLE_IT_CACHE_SIZE, 1); + if (rc == SCRUB_IT_ALL) + it->ooi_all_cached = 1; + + CDEBUG(D_LFSCK, "OSD pre-loaded: max = %u, preload = %u, rc = %d\n", + le32_to_cpu(LDISKFS_SB(osd_sb(dev))->s_es->s_inodes_count), + ooc->ooc_pos_preload, rc); + + if (scrub->os_waiting && osd_scrub_has_window(scrub, ooc)) { + scrub->os_waiting = 0; + cfs_waitq_broadcast(&scrub->os_thread.t_ctl_waitq); + } + + RETURN(rc < 0 ? rc : ooc->ooc_cached_items); +} + static int osd_scrub_main(void *args) { struct lu_env env; @@ -932,6 +1006,8 @@ noenv: return rc; } +/* OI scrub start/stop */ + static int do_osd_scrub_start(struct osd_device *dev, __u32 flags) { struct osd_scrub *scrub = &dev->od_scrub; @@ -1017,6 +1093,8 @@ static void osd_scrub_stop(struct osd_device *dev) mutex_unlock(&dev->od_otable_mutex); } +/* OI scrub setup/cleanup */ + static const char osd_scrub_name[] = "OI_scrub"; int osd_scrub_setup(const struct lu_env *env, struct osd_device *dev) @@ -1055,6 +1133,8 @@ int osd_scrub_setup(const struct lu_env *env, struct osd_device *dev) scrub->os_inode = igrab(filp->f_dentry->d_inode); filp_close(filp, 0); pop_ctxt(&saved, ctxt, NULL); + ldiskfs_set_inode_state(scrub->os_inode, + LDISKFS_STATE_LUSTRE_NO_OI); rc = osd_scrub_file_load(scrub); if (rc == -ENOENT) { @@ -1135,6 +1215,8 @@ void osd_scrub_cleanup(const struct lu_env *env, struct osd_device *dev) osd_oi_fini(osd_oti_get(env), dev); } +/* object table based iteration APIs */ + static struct dt_it *osd_otable_it_init(const struct lu_env *env, struct dt_object *dt, __u32 attr, struct lustre_capa *capa) @@ -1252,32 +1334,6 @@ static int osd_otable_it_get(const struct lu_env *env, RETURN(0); } -static int osd_otable_it_preload(const struct lu_env *env, - struct osd_otable_it *it) -{ - struct osd_device *dev = it->ooi_dev; - struct osd_scrub *scrub = &dev->od_scrub; - struct osd_otable_cache *ooc = &it->ooi_cache; - int rc; - ENTRY; - - rc = osd_inode_iteration(osd_oti_get(env), dev, - OSD_OTABLE_IT_CACHE_SIZE, 1); - if (rc == SCRUB_IT_ALL) - it->ooi_all_cached = 1; - - CDEBUG(D_LFSCK, "OSD pre-loaded: max = %u, preload = %u, rc = %d\n", - le32_to_cpu(LDISKFS_SB(osd_sb(dev))->s_es->s_inodes_count), - ooc->ooc_pos_preload, rc); - - if (scrub->os_waiting && osd_scrub_has_window(scrub, ooc)) { - scrub->os_waiting = 0; - cfs_waitq_broadcast(&scrub->os_thread.t_ctl_waitq); - } - - RETURN(rc < 0 ? rc : ooc->ooc_cached_items); -} - static int osd_otable_it_next(const struct lu_env *env, struct dt_it *di) { struct osd_otable_it *it = (struct osd_otable_it *)di; @@ -1388,6 +1444,8 @@ const struct dt_index_operations osd_otable_ops = { } }; +/* high priority inconsistent items list APIs */ + int osd_oii_insert(struct osd_device *dev, struct osd_idmap_cache *oic, int insert) { @@ -1443,6 +1501,8 @@ int osd_oii_lookup(struct osd_device *dev, const struct lu_fid *fid, RETURN(-ENOENT); } +/* OI scrub dump */ + static const char *scrub_status_names[] = { "init", "scanning", @@ -1540,7 +1600,7 @@ int osd_scrub_dump(struct osd_device *dev, char *buf, int len) down_read(&scrub->os_rwsem); rc = snprintf(buf, len, - "name: OI scrub\n" + "name: OI_scrub\n" "magic: 0x%x\n" "oi_files: %d\n" "status: %s\n", diff --git a/lustre/osd-ldiskfs/osd_scrub.h b/lustre/osd-ldiskfs/osd_scrub.h index 831dd67..fcb0ae3 100644 --- a/lustre/osd-ldiskfs/osd_scrub.h +++ b/lustre/osd-ldiskfs/osd_scrub.h @@ -140,7 +140,7 @@ struct scrub_file { /* How many prior objects have been updated during scanning. */ __u64 sf_items_updated_prior; - /* How many objects marked as I_LUSTRE_NOSCRUB. */ + /* How many objects marked as LDISKFS_STATE_LUSTRE_NOSCRUB. */ __u64 sf_items_noscrub; /* How many IGIF objects. */ -- 1.8.3.1