From: Fan Yong Date: Wed, 4 Sep 2013 14:32:51 +0000 (+0800) Subject: LU-3335 osd: self repair LMA by the RPC service thread X-Git-Tag: 2.5.51~4 X-Git-Url: https://git.whamcloud.com/?p=fs%2Flustre-release.git;a=commitdiff_plain;h=fb1e9c68183549b030a68f89ed45c6bace1a3daa LU-3335 osd: self repair LMA by the RPC service thread Current OI scrub uses the FID in LMA to locate the OI mapping and repair the inconsistent OI mapping. Such inode table based system scanning does not guarantee all OI inconsistency can be repaired, such as inode without LMA or with crashed LMA, or cannot find out dummy OI mappings (the object for the OI mapping does not exist). In fact, after the inode table based scanning, the OI scrub can make another OI table based scanning to resolve above mentioned issues. But such scanning will be nonlinear, so performance may be bad. Currently, we have some other relative simple solution although they are not perfect. We can make the RPC service thread to fix the found inconsistency by itself: when verify the LMA with the given FID, and found no LMA, then generate LMA according to the given FID and store the new LMA as the inode xattr. Usually, it is rare case that the inode has no LMA. So such additional work for the RPC service thread will not affect performance much. Signed-off-by: Fan Yong Change-Id: Ic6b8b533e56b6a0e388d5c38de7b113cb9bd6dad Reviewed-on: http://review.whamcloud.com/7144 Tested-by: Jenkins Tested-by: Maloo Reviewed-by: Alex Zhuravlev Reviewed-by: Andreas Dilger Reviewed-by: Mike Pershin --- diff --git a/lustre/osd-ldiskfs/osd_handler.c b/lustre/osd-ldiskfs/osd_handler.c index 241fc19..096dac0 100644 --- a/lustre/osd-ldiskfs/osd_handler.c +++ b/lustre/osd-ldiskfs/osd_handler.c @@ -319,6 +319,7 @@ int osd_get_idif(struct osd_thread_info *info, struct inode *inode, static int osd_check_lma(const struct lu_env *env, struct osd_object *obj) { struct osd_thread_info *info = osd_oti_get(env); + struct osd_device *osd = osd_obj2dev(obj); struct lustre_mdt_attrs *lma = &info->oti_mdt_attrs; struct inode *inode = obj->oo_inode; struct dentry *dentry = &info->oti_obj_dentry; @@ -333,11 +334,41 @@ static int osd_check_lma(const struct lu_env *env, struct osd_object *obj) rc = __osd_xattr_get(inode, dentry, XATTR_NAME_LMA, info->oti_mdt_attrs_old, LMA_OLD_SIZE); if (rc == -ENODATA && !fid_is_igif(lu_object_fid(&obj->oo_dt.do_lu)) && - osd_obj2dev(obj)->od_check_ff) { + osd->od_check_ff) { fid = &lma->lma_self_fid; rc = osd_get_idif(info, inode, dentry, fid); - if (rc > 0) + if ((rc > 0) || (rc == -ENODATA && osd->od_lma_self_repair)) { + handle_t *jh; + + /* For the given OST-object, if it has neither LMA nor + * FID in XATTR_NAME_FID, then the given FID (which is + * contained in the @obj, from client RPC for locating + * the OST-object) is trusted. We use it to generate + * the LMA. */ + + LASSERT(current->journal_info == NULL); + + jh = ldiskfs_journal_start_sb(osd_sb(osd), + osd_dto_credits_noquota[DTO_XATTR_SET]); + if (IS_ERR(jh)) { + CWARN("%s: cannot start journal for " + "lma_self_repair: rc = %ld\n", + osd_name(osd), PTR_ERR(jh)); + RETURN(0); + } + + rc = osd_ea_fid_set(info, inode, + lu_object_fid(&obj->oo_dt.do_lu), + fid_is_on_ost(info, osd, + lu_object_fid(&obj->oo_dt.do_lu), + OI_CHECK_FLD) ? + LMAC_FID_ON_OST : 0, 0); + if (rc != 0) + CWARN("%s: cannot self repair the LMA: " + "rc = %d\n", osd_name(osd), rc); + ldiskfs_journal_stop(jh); RETURN(0); + } } if (unlikely(rc == -ENODATA)) @@ -352,8 +383,7 @@ static int osd_check_lma(const struct lu_env *env, struct osd_object *obj) if (unlikely((lma->lma_incompat & ~LMA_INCOMPAT_SUPP) || CFS_FAIL_CHECK(OBD_FAIL_OSD_LMA_INCOMPAT))) { CWARN("%s: unsupported incompat LMA feature(s) %#x for " - "fid = "DFID", ino = %lu\n", - osd_obj2dev(obj)->od_svname, + "fid = "DFID", ino = %lu\n", osd_name(osd), lma->lma_incompat & ~LMA_INCOMPAT_SUPP, PFID(lu_object_fid(&obj->oo_dt.do_lu)), inode->i_ino); @@ -366,8 +396,7 @@ static int osd_check_lma(const struct lu_env *env, struct osd_object *obj) if (fid != NULL && unlikely(!lu_fid_eq(lu_object_fid(&obj->oo_dt.do_lu), fid))) { CDEBUG(D_INODE, "%s: FID "DFID" != self_fid "DFID"\n", - osd_obj2dev(obj)->od_svname, - PFID(lu_object_fid(&obj->oo_dt.do_lu)), + osd_name(osd), PFID(lu_object_fid(&obj->oo_dt.do_lu)), PFID(&lma->lma_self_fid)); rc = -EREMCHG; } @@ -5582,6 +5611,9 @@ static int osd_device_init0(const struct lu_env *env, if (rc != 0) GOTO(out_site, rc); + /* self-repair LMA by default */ + o->od_lma_self_repair = 1; + CFS_INIT_LIST_HEAD(&o->od_ios_list); /* setup scrub, including OI files initialization */ rc = osd_scrub_setup(env, o); diff --git a/lustre/osd-ldiskfs/osd_internal.h b/lustre/osd-ldiskfs/osd_internal.h index c5440ae..50539a6 100644 --- a/lustre/osd-ldiskfs/osd_internal.h +++ b/lustre/osd-ldiskfs/osd_internal.h @@ -240,7 +240,8 @@ struct osd_device { od_dirent_journal:1, od_igif_inoi:1, od_check_ff:1, - od_is_ost:1; + od_is_ost:1, + od_lma_self_repair:1; unsigned long od_capa_timeout; __u32 od_capa_alg; diff --git a/lustre/osd-ldiskfs/osd_lproc.c b/lustre/osd-ldiskfs/osd_lproc.c index 5171863..83bb586 100644 --- a/lustre/osd-ldiskfs/osd_lproc.c +++ b/lustre/osd-ldiskfs/osd_lproc.c @@ -514,6 +514,38 @@ int lprocfs_osd_wr_readcache(struct file *file, const char *buffer, return count; } +static int lprocfs_osd_rd_lma_self_repair(char *page, char **start, off_t off, + int count, int *eof, void *data) +{ + struct osd_device *dev = osd_dt_dev(data); + + LASSERT(dev != NULL); + if (unlikely(dev->od_mnt == NULL)) + return -EINPROGRESS; + + *eof = 1; + return snprintf(page, count, "%d\n", !!dev->od_lma_self_repair); +} + +static int lprocfs_osd_wr_lma_self_repair(struct file *file, const char *buffer, + unsigned long count, void *data) +{ + struct osd_device *dev = osd_dt_dev(data); + int val; + int rc; + + LASSERT(dev != NULL); + if (unlikely(dev->od_mnt == NULL)) + return -EINPROGRESS; + + rc = lprocfs_write_helper(buffer, count, &val); + if (rc) + return rc; + + dev->od_lma_self_repair = !!val; + return count; +} + struct lprocfs_vars lprocfs_osd_obd_vars[] = { { "blocksize", lprocfs_dt_rd_blksize, 0, 0 }, { "kbytestotal", lprocfs_dt_rd_kbytestotal, 0, 0 }, @@ -534,6 +566,8 @@ struct lprocfs_vars lprocfs_osd_obd_vars[] = { lprocfs_osd_wr_wcache, 0 }, { "readcache_max_filesize", lprocfs_osd_rd_readcache, lprocfs_osd_wr_readcache, 0 }, + { "lma_self_repair", lprocfs_osd_rd_lma_self_repair, + lprocfs_osd_wr_lma_self_repair, 0, 0 }, { 0 } };