From c5a411dac119b63943d60a0d6875ce15275c67f8 Mon Sep 17 00:00:00 2001 From: Fan Yong Date: Tue, 15 Jan 2013 17:50:49 +0800 Subject: [PATCH] LU-1866 lfsck: FID-in-{dirent,LMA} check and repair As part of LFSCK 1.5, it implementes the following functionalities: 1) Verify and rebuild FID-in-dirent, mainly for MDT file-level backup/restore. 2) (Partly) verify and rebuild FID-in-LMA: if the FID-in-dirent is valid but FID-in-LMA is missed, then the later one will be repaired; but if both are missed, then it is OI scrub's duty to repair the inconsistency. 3) (Partly) handle IGIF objects upgrading: build IGIF-in-dirent for IGIF objects. The other two parts work for IGIF objects upgrading are: 3.1) Generate IGIF-in-LMA, which has been done by OI scrub in former patch(es). 3.2) Generate linkEA, which will be done by up layer LFSCK in other patch(es). Test-Parameters: envdefinitions=ENABLE_QUOTA=yes testlist=sanity-lfsck Signed-off-by: Fan Yong Change-Id: Ibb317f0f3f3e9cf6db56bfa25f55d2017b0e1937 Reviewed-on: http://review.whamcloud.com/4912 Tested-by: Hudson Reviewed-by: Alex Zhuravlev Tested-by: Maloo Reviewed-by: Andreas Dilger Reviewed-by: Oleg Drokin --- lustre/include/lustre/lustre_idl.h | 77 +++--- lustre/include/obd_support.h | 5 + lustre/obdclass/lu_object.c | 1 - lustre/osd-ldiskfs/osd_handler.c | 465 +++++++++++++++++++++++++++++++++---- lustre/osd-ldiskfs/osd_internal.h | 1 + lustre/osd-ldiskfs/osd_scrub.c | 3 +- lustre/tests/sanity-lfsck.sh | 128 ++++++++++ 7 files changed, 596 insertions(+), 84 deletions(-) diff --git a/lustre/include/lustre/lustre_idl.h b/lustre/include/lustre/lustre_idl.h index 0750b87..0b2adb4 100644 --- a/lustre/include/lustre/lustre_idl.h +++ b/lustre/include/lustre/lustre_idl.h @@ -729,50 +729,46 @@ static inline void lu_igif_build(struct lu_fid *fid, __u32 ino, __u32 gen) */ static inline void fid_cpu_to_le(struct lu_fid *dst, const struct lu_fid *src) { - /* check that all fields are converted */ - CLASSERT(sizeof *src == - sizeof fid_seq(src) + - sizeof fid_oid(src) + sizeof fid_ver(src)); - LASSERTF(fid_is_igif(src) || fid_ver(src) == 0, DFID"\n", PFID(src)); - dst->f_seq = cpu_to_le64(fid_seq(src)); - dst->f_oid = cpu_to_le32(fid_oid(src)); - dst->f_ver = cpu_to_le32(fid_ver(src)); + /* check that all fields are converted */ + CLASSERT(sizeof *src == + sizeof fid_seq(src) + + sizeof fid_oid(src) + sizeof fid_ver(src)); + dst->f_seq = cpu_to_le64(fid_seq(src)); + dst->f_oid = cpu_to_le32(fid_oid(src)); + dst->f_ver = cpu_to_le32(fid_ver(src)); } static inline void fid_le_to_cpu(struct lu_fid *dst, const struct lu_fid *src) { - /* check that all fields are converted */ - CLASSERT(sizeof *src == - sizeof fid_seq(src) + - sizeof fid_oid(src) + sizeof fid_ver(src)); - dst->f_seq = le64_to_cpu(fid_seq(src)); - dst->f_oid = le32_to_cpu(fid_oid(src)); - dst->f_ver = le32_to_cpu(fid_ver(src)); - LASSERTF(fid_is_igif(dst) || fid_ver(dst) == 0, DFID"\n", PFID(dst)); + /* check that all fields are converted */ + CLASSERT(sizeof *src == + sizeof fid_seq(src) + + sizeof fid_oid(src) + sizeof fid_ver(src)); + dst->f_seq = le64_to_cpu(fid_seq(src)); + dst->f_oid = le32_to_cpu(fid_oid(src)); + dst->f_ver = le32_to_cpu(fid_ver(src)); } static inline void fid_cpu_to_be(struct lu_fid *dst, const struct lu_fid *src) { - /* check that all fields are converted */ - CLASSERT(sizeof *src == - sizeof fid_seq(src) + - sizeof fid_oid(src) + sizeof fid_ver(src)); - LASSERTF(fid_is_igif(src) || fid_ver(src) == 0, DFID"\n", PFID(src)); - dst->f_seq = cpu_to_be64(fid_seq(src)); - dst->f_oid = cpu_to_be32(fid_oid(src)); - dst->f_ver = cpu_to_be32(fid_ver(src)); + /* check that all fields are converted */ + CLASSERT(sizeof *src == + sizeof fid_seq(src) + + sizeof fid_oid(src) + sizeof fid_ver(src)); + dst->f_seq = cpu_to_be64(fid_seq(src)); + dst->f_oid = cpu_to_be32(fid_oid(src)); + dst->f_ver = cpu_to_be32(fid_ver(src)); } static inline void fid_be_to_cpu(struct lu_fid *dst, const struct lu_fid *src) { - /* check that all fields are converted */ - CLASSERT(sizeof *src == - sizeof fid_seq(src) + - sizeof fid_oid(src) + sizeof fid_ver(src)); - dst->f_seq = be64_to_cpu(fid_seq(src)); - dst->f_oid = be32_to_cpu(fid_oid(src)); - dst->f_ver = be32_to_cpu(fid_ver(src)); - LASSERTF(fid_is_igif(dst) || fid_ver(dst) == 0, DFID"\n", PFID(dst)); + /* check that all fields are converted */ + CLASSERT(sizeof *src == + sizeof fid_seq(src) + + sizeof fid_oid(src) + sizeof fid_ver(src)); + dst->f_seq = be64_to_cpu(fid_seq(src)); + dst->f_oid = be32_to_cpu(fid_oid(src)); + dst->f_ver = be32_to_cpu(fid_ver(src)); } static inline int fid_is_sane(const struct lu_fid *fid) @@ -791,17 +787,12 @@ static inline int fid_is_zero(const struct lu_fid *fid) extern void lustre_swab_lu_fid(struct lu_fid *fid); extern void lustre_swab_lu_seq_range(struct lu_seq_range *range); -static inline int lu_fid_eq(const struct lu_fid *f0, - const struct lu_fid *f1) -{ - /* Check that there is no alignment padding. */ - CLASSERT(sizeof *f0 == - sizeof f0->f_seq + sizeof f0->f_oid + sizeof f0->f_ver); - LASSERTF((fid_is_igif(f0) || fid_is_idif(f0)) || - fid_ver(f0) == 0, DFID, PFID(f0)); - LASSERTF((fid_is_igif(f1) || fid_is_idif(f1)) || - fid_ver(f1) == 0, DFID, PFID(f1)); - return memcmp(f0, f1, sizeof *f0) == 0; +static inline int lu_fid_eq(const struct lu_fid *f0, const struct lu_fid *f1) +{ + /* Check that there is no alignment padding. */ + CLASSERT(sizeof *f0 == + sizeof f0->f_seq + sizeof f0->f_oid + sizeof f0->f_ver); + return memcmp(f0, f1, sizeof *f0) == 0; } #define __diff_normalize(val0, val1) \ diff --git a/lustre/include/obd_support.h b/lustre/include/obd_support.h index 79ec6aa..43a69f9 100644 --- a/lustre/include/obd_support.h +++ b/lustre/include/obd_support.h @@ -463,6 +463,11 @@ int obd_alloc_fail(const void *ptr, const char *name, const char *type, #define OBD_FAIL_UPDATE_OBJ_NET 0x1500 +#define OBD_FAIL_FID_INDIR 0x1501 +#define OBD_FAIL_FID_INLMA 0x1502 +#define OBD_FAIL_FID_LOOKUP 0x1505 +#define OBD_FAIL_FID_NOLMA 0x1506 + /* LFSCK */ #define OBD_FAIL_LFSCK_DELAY1 0x1600 #define OBD_FAIL_LFSCK_DELAY2 0x1601 diff --git a/lustre/obdclass/lu_object.c b/lustre/obdclass/lu_object.c index 09c189b..6392139 100644 --- a/lustre/obdclass/lu_object.c +++ b/lustre/obdclass/lu_object.c @@ -196,7 +196,6 @@ static struct lu_object *lu_object_alloc(const struct lu_env *env, * This is the only place where object fid is assigned. It's constant * after this point. */ - LASSERT(fid_is_igif(f) || fid_ver(f) == 0); top->lo_header->loh_fid = *f; layers = &top->lo_header->loh_layers; do { diff --git a/lustre/osd-ldiskfs/osd_handler.c b/lustre/osd-ldiskfs/osd_handler.c index a36f86a..fcde844 100644 --- a/lustre/osd-ldiskfs/osd_handler.c +++ b/lustre/osd-ldiskfs/osd_handler.c @@ -2219,6 +2219,9 @@ int osd_ea_fid_set(struct osd_thread_info *info, struct inode *inode, struct lustre_mdt_attrs *lma = &info->oti_mdt_attrs; int rc; + if (OBD_FAIL_CHECK(OBD_FAIL_FID_INLMA)) + return 0; + lustre_lma_init(lma, fid); lustre_lma_swab(lma); @@ -3149,7 +3152,7 @@ static inline int osd_get_fid_from_dentry(struct ldiskfs_dir_entry_2 *de, rec = (struct osd_fid_pack *) (de->name + de->name_len + 1); rc = osd_fid_unpack((struct lu_fid *)fid, rec); } - RETURN(rc); + return rc; } static int osd_remote_fid(const struct lu_env *env, struct osd_device *osd, @@ -3544,8 +3547,18 @@ static int osd_ea_add_rec(const struct lu_env *env, struct osd_object *pobj, down_write(&pobj->oo_ext_idx_sem); } - rc = __osd_ea_add_rec(info, pobj, cinode, name, fid, - hlock, th); + if (OBD_FAIL_CHECK(OBD_FAIL_FID_INDIR)) { + struct lu_fid *tfid = &info->oti_fid; + + *tfid = *(const struct lu_fid *)fid; + tfid->f_ver = ~0; + rc = __osd_ea_add_rec(info, pobj, cinode, name, + (const struct dt_rec *)tfid, + hlock, th); + } else { + rc = __osd_ea_add_rec(info, pobj, cinode, name, fid, + hlock, th); + } } if (hlock != NULL) ldiskfs_htree_unlock(hlock); @@ -3604,6 +3617,31 @@ again: EXIT; } +static int osd_fail_fid_lookup(struct osd_thread_info *oti, + struct osd_device *dev, + struct osd_idmap_cache *oic, + struct lu_fid *fid, __u32 ino) +{ + struct lustre_mdt_attrs *lma = &oti->oti_mdt_attrs; + struct inode *inode; + int rc; + + osd_id_gen(&oic->oic_lid, ino, OSD_OII_NOGEN); + inode = osd_iget(oti, dev, &oic->oic_lid); + if (IS_ERR(inode)) { + fid_zero(&oic->oic_fid); + return PTR_ERR(inode); + } + + rc = osd_get_lma(oti, inode, &oti->oti_obj_dentry, lma); + iput(inode); + if (rc != 0) + fid_zero(&oic->oic_fid); + else + *fid = oic->oic_fid = lma->lma_self_fid; + return rc; +} + /** * Calls ->lookup() to find dentry. From dentry get inode and * read inode's ea to get fid. This is required for interoperability @@ -3648,6 +3686,12 @@ static int osd_ea_lookup_rec(const struct lu_env *env, struct osd_object *obj, struct scrub_file *sf = &scrub->os_file; ino = le32_to_cpu(de->inode); + if (OBD_FAIL_CHECK(OBD_FAIL_FID_LOOKUP)) { + brelse(bh); + rc = osd_fail_fid_lookup(oti, dev, oic, fid, ino); + GOTO(out, rc); + } + rc = osd_get_fid_from_dentry(de, rec); /* done with de, release bh */ @@ -4037,44 +4081,41 @@ static int osd_it_iam_key_size(const struct lu_env *env, const struct dt_it *di) return iam_it_key_size(&it->oi_it); } -static inline void osd_it_append_attrs(struct lu_dirent *ent, __u32 attr, - int len, __u16 type) +static inline void +osd_it_append_attrs(struct lu_dirent *ent, int len, __u16 type) { - struct luda_type *lt; - const unsigned align = sizeof(struct luda_type) - 1; - - /* check if file type is required */ - if (attr & LUDA_TYPE) { - len = (len + align) & ~align; + /* check if file type is required */ + if (ent->lde_attrs & LUDA_TYPE) { + int align = sizeof(struct luda_type) - 1; + struct luda_type *lt; - lt = (void *) ent->lde_name + len; - lt->lt_type = cpu_to_le16(CFS_DTTOIF(type)); - ent->lde_attrs |= LUDA_TYPE; - } + len = (len + align) & ~align; + lt = (struct luda_type *)(ent->lde_name + len); + lt->lt_type = cpu_to_le16(CFS_DTTOIF(type)); + } - ent->lde_attrs = cpu_to_le32(ent->lde_attrs); + ent->lde_attrs = cpu_to_le32(ent->lde_attrs); } /** * build lu direct from backend fs dirent. */ -static inline void osd_it_pack_dirent(struct lu_dirent *ent, - struct lu_fid *fid, __u64 offset, - char *name, __u16 namelen, - __u16 type, __u32 attr) +static inline void +osd_it_pack_dirent(struct lu_dirent *ent, struct lu_fid *fid, __u64 offset, + char *name, __u16 namelen, __u16 type, __u32 attr) { - fid_cpu_to_le(&ent->lde_fid, fid); - ent->lde_attrs = LUDA_FID; + ent->lde_attrs = attr | LUDA_FID; + fid_cpu_to_le(&ent->lde_fid, fid); - ent->lde_hash = cpu_to_le64(offset); - ent->lde_reclen = cpu_to_le16(lu_dirent_calc_size(namelen, attr)); + ent->lde_hash = cpu_to_le64(offset); + ent->lde_reclen = cpu_to_le16(lu_dirent_calc_size(namelen, attr)); - strncpy(ent->lde_name, name, namelen); - ent->lde_namelen = cpu_to_le16(namelen); + strncpy(ent->lde_name, name, namelen); + ent->lde_namelen = cpu_to_le16(namelen); - /* append lustre attributes */ - osd_it_append_attrs(ent, attr, namelen, type); + /* append lustre attributes */ + osd_it_append_attrs(ent, namelen, type); } /** @@ -4442,10 +4483,337 @@ static int osd_it_ea_key_size(const struct lu_env *env, const struct dt_it *di) return it->oie_dirent->oied_namelen; } +static int +osd_dirent_update(handle_t *jh, struct super_block *sb, + struct osd_it_ea_dirent *ent, struct lu_fid *fid, + struct buffer_head *bh, struct ldiskfs_dir_entry_2 *de) +{ + struct osd_fid_pack *rec; + int rc; + ENTRY; + + LASSERT(de->file_type & LDISKFS_DIRENT_LUFID); + LASSERT(de->rec_len >= de->name_len + sizeof(struct osd_fid_pack)); + + rc = ldiskfs_journal_get_write_access(jh, bh); + if (rc != 0) { + CERROR("%.16s: fail to write access for update dirent: " + "name = %.*s, rc = %d\n", + LDISKFS_SB(sb)->s_es->s_volume_name, + ent->oied_namelen, ent->oied_name, rc); + RETURN(rc); + } + + rec = (struct osd_fid_pack *)(de->name + de->name_len + 1); + fid_cpu_to_be((struct lu_fid *)rec->fp_area, fid); + rc = ldiskfs_journal_dirty_metadata(jh, bh); + if (rc != 0) + CERROR("%.16s: fail to dirty metadata for update dirent: " + "name = %.*s, rc = %d\n", + LDISKFS_SB(sb)->s_es->s_volume_name, + ent->oied_namelen, ent->oied_name, rc); + + RETURN(rc); +} + +static inline int +osd_dirent_has_space(__u16 reclen, __u16 namelen, unsigned blocksize) +{ + if (ldiskfs_rec_len_from_disk(reclen, blocksize) >= + __LDISKFS_DIR_REC_LEN(namelen + 1 + sizeof(struct osd_fid_pack))) + return 1; + else + return 0; +} + +static int +osd_dirent_reinsert(const struct lu_env *env, handle_t *jh, + struct inode *dir, struct inode *inode, + struct osd_it_ea_dirent *ent, struct lu_fid *fid, + struct buffer_head *bh, struct ldiskfs_dir_entry_2 *de, + struct htree_lock *hlock) +{ + struct dentry *dentry; + struct osd_fid_pack *rec; + struct ldiskfs_dentry_param *ldp; + int rc; + ENTRY; + + if (!LDISKFS_HAS_INCOMPAT_FEATURE(inode->i_sb, + LDISKFS_FEATURE_INCOMPAT_DIRDATA)) + RETURN(0); + + /* There is enough space to hold the FID-in-dirent. */ + if (osd_dirent_has_space(de->rec_len, ent->oied_namelen, + dir->i_sb->s_blocksize)) { + rc = ldiskfs_journal_get_write_access(jh, bh); + if (rc != 0) { + CERROR("%.16s: fail to write access for reinsert " + "dirent: name = %.*s, rc = %d\n", + LDISKFS_SB(inode->i_sb)->s_es->s_volume_name, + ent->oied_namelen, ent->oied_name, rc); + RETURN(rc); + } + + de->name[de->name_len] = 0; + rec = (struct osd_fid_pack *)(de->name + de->name_len + 1); + rec->fp_len = sizeof(struct lu_fid) + 1; + fid_cpu_to_be((struct lu_fid *)rec->fp_area, fid); + de->file_type |= LDISKFS_DIRENT_LUFID; + + rc = ldiskfs_journal_dirty_metadata(jh, bh); + if (rc != 0) + CERROR("%.16s: fail to dirty metadata for reinsert " + "dirent: name = %.*s, rc = %d\n", + LDISKFS_SB(inode->i_sb)->s_es->s_volume_name, + ent->oied_namelen, ent->oied_name, rc); + + RETURN(rc); + } + + rc = ldiskfs_delete_entry(jh, dir, de, bh); + if (rc != 0) { + CERROR("%.16s: fail to delete entry for reinsert dirent: " + "name = %.*s, rc = %d\n", + LDISKFS_SB(inode->i_sb)->s_es->s_volume_name, + ent->oied_namelen, ent->oied_name, rc); + RETURN(rc); + } + + dentry = osd_child_dentry_by_inode(env, dir, ent->oied_name, + ent->oied_namelen); + ldp = (struct ldiskfs_dentry_param *)osd_oti_get(env)->oti_ldp; + osd_get_ldiskfs_dirent_param(ldp, (const struct dt_rec *)fid); + dentry->d_fsdata = (void *)ldp; + ll_vfs_dq_init(dir); + rc = osd_ldiskfs_add_entry(jh, dentry, inode, hlock); + /* It is too bad, we cannot reinsert the name entry back. + * That means we lose it! */ + if (rc != 0) + CERROR("%.16s: fail to insert entry for reinsert dirent: " + "name = %.*s, rc = %d\n", + LDISKFS_SB(inode->i_sb)->s_es->s_volume_name, + ent->oied_namelen, ent->oied_name, rc); + + RETURN(rc); +} + +static int +osd_dirent_check_repair(const struct lu_env *env, struct osd_object *obj, + struct osd_it_ea *it, struct lu_fid *fid, + struct osd_inode_id *id, __u32 *attr) +{ + struct osd_thread_info *info = osd_oti_get(env); + struct lustre_mdt_attrs *lma = &info->oti_mdt_attrs; + struct osd_device *dev = osd_obj2dev(obj); + struct super_block *sb = osd_sb(dev); + const char *devname = + LDISKFS_SB(sb)->s_es->s_volume_name; + struct osd_it_ea_dirent *ent = it->oie_dirent; + struct inode *dir = obj->oo_inode; + struct htree_lock *hlock = NULL; + struct buffer_head *bh = NULL; + handle_t *jh = NULL; + struct ldiskfs_dir_entry_2 *de; + struct dentry *dentry; + struct inode *inode; + int credits; + int rc; + bool dirty = false; + bool is_dotdot = false; + ENTRY; + + if (ent->oied_name[0] == '.') { + /* Skip dot entry, even if it has stale FID-in-dirent, because + * we do not use such FID-in-dirent anymore, it is harmless. */ + if (ent->oied_namelen == 1) + RETURN(0); + + if (ent->oied_namelen == 2 && ent->oied_name[1] == '.') + is_dotdot = true; + } + + dentry = osd_child_dentry_get(env, obj, ent->oied_name, + ent->oied_namelen); + + /* We need to ensure that the name entry is still valid. + * Because it may be removed or renamed by other already. + * + * The unlink or rename operation will start journal before PDO lock, + * so to avoid deadlock, here we need to start journal handle before + * related PDO lock also. But because we do not know whether there + * will be something to be repaired before PDO lock, we just start + * journal without conditions. + * + * We may need to remove the name entry firstly, then insert back. + * One credit is for user quota file update. + * One credit is for group quota file update. + * Two credits are for dirty inode. */ + credits = osd_dto_credits_noquota[DTO_INDEX_DELETE] + + osd_dto_credits_noquota[DTO_INDEX_INSERT] + 1 + 1 + 2; + +again: + if (dev->od_dirent_journal) { + jh = ldiskfs_journal_start_sb(sb, credits); + if (IS_ERR(jh)) { + rc = PTR_ERR(jh); + CERROR("%.16s: fail to start trans for dirent " + "check_repair: credits %d, name %.*s, rc %d\n", + devname, credits, ent->oied_namelen, + ent->oied_name, rc); + RETURN(rc); + } + } + + if (obj->oo_hl_head != NULL) { + hlock = osd_oti_get(env)->oti_hlock; + ldiskfs_htree_lock(hlock, obj->oo_hl_head, dir, + LDISKFS_HLOCK_DEL); + } else { + down_write(&obj->oo_ext_idx_sem); + } + + bh = osd_ldiskfs_find_entry(dir, dentry, &de, hlock); + /* For dotdot entry, if there is not enough space to hold FID-in-dirent, + * just keep it there. It only happens when the device upgraded from 1.8 + * or restored from MDT file-level backup. For the whole directory, only + * dotdot entry has no FID-in-dirent and needs to get FID from LMA when + * readdir, it will not affect the performance much. */ + if ((bh == NULL) || (le32_to_cpu(de->inode) != ent->oied_ino) || + (is_dotdot && !osd_dirent_has_space(de->rec_len, + ent->oied_namelen, + sb->s_blocksize))) { + *attr |= LUDA_IGNORE; + GOTO(out_journal, rc = 0); + } + + osd_id_gen(id, ent->oied_ino, OSD_OII_NOGEN); + inode = osd_iget(info, dev, id); + if (IS_ERR(inode)) { + rc = PTR_ERR(inode); + if (rc == -ENOENT || rc == -ESTALE) { + *attr |= LUDA_IGNORE; + rc = 0; + } + + GOTO(out_journal, rc); + } + + rc = osd_get_lma(info, inode, &info->oti_obj_dentry, lma); + if (rc == 0) { + if (fid_is_sane(fid)) { + /* FID-in-dirent is valid. */ + if (lu_fid_eq(fid, &lma->lma_self_fid)) + GOTO(out_inode, rc = 0); + + /* Do not repair under dryrun mode. */ + if (*attr & LUDA_VERIFY_DRYRUN) { + *attr |= LUDA_REPAIR; + GOTO(out_inode, rc = 0); + } + + if (!dev->od_dirent_journal) { + iput(inode); + brelse(bh); + if (hlock != NULL) + ldiskfs_htree_unlock(hlock); + else + up_write(&obj->oo_ext_idx_sem); + dev->od_dirent_journal = 1; + goto again; + } + + *fid = lma->lma_self_fid; + dirty = true; + /* Update the FID-in-dirent. */ + rc = osd_dirent_update(jh, sb, ent, fid, bh, de); + if (rc == 0) + *attr |= LUDA_REPAIR; + } else { + /* Do not repair under dryrun mode. */ + if (*attr & LUDA_VERIFY_DRYRUN) { + *attr |= LUDA_REPAIR; + GOTO(out_inode, rc = 0); + } + + if (!dev->od_dirent_journal) { + iput(inode); + brelse(bh); + if (hlock != NULL) + ldiskfs_htree_unlock(hlock); + else + up_write(&obj->oo_ext_idx_sem); + dev->od_dirent_journal = 1; + goto again; + } + + *fid = lma->lma_self_fid; + dirty = true; + /* Append the FID-in-dirent. */ + rc = osd_dirent_reinsert(env, jh, dir, inode, ent, + fid, bh, de, hlock); + if (rc == 0) + *attr |= LUDA_REPAIR; + } + } else if (rc == -ENODATA) { + /* Do not repair under dryrun mode. */ + if (*attr & LUDA_VERIFY_DRYRUN) { + if (fid_is_sane(fid)) + *attr |= LUDA_REPAIR; + else + *attr |= LUDA_UPGRADE; + GOTO(out_inode, rc = 0); + } + + if (!dev->od_dirent_journal) { + iput(inode); + brelse(bh); + if (hlock != NULL) + ldiskfs_htree_unlock(hlock); + else + up_write(&obj->oo_ext_idx_sem); + dev->od_dirent_journal = 1; + goto again; + } + + dirty = true; + if (unlikely(fid_is_sane(fid))) { + /* FID-in-dirent exists, but FID-in-LMA is lost. + * Trust the FID-in-dirent, and add FID-in-LMA. */ + rc = osd_ea_fid_set(info, inode, fid); + if (rc == 0) + *attr |= LUDA_REPAIR; + } else { + lu_igif_build(fid, inode->i_ino, inode->i_generation); + /* It is probably IGIF object. Only aappend the + * FID-in-dirent. OI scrub will process FID-in-LMA. */ + rc = osd_dirent_reinsert(env, jh, dir, inode, ent, + fid, bh, de, hlock); + if (rc == 0) + *attr |= LUDA_UPGRADE; + } + } + + GOTO(out_inode, rc); + +out_inode: + iput(inode); + +out_journal: + brelse(bh); + if (hlock != NULL) + ldiskfs_htree_unlock(hlock); + else + up_write(&obj->oo_ext_idx_sem); + if (jh != NULL) + ldiskfs_journal_stop(jh); + if (rc >= 0 && !dirty) + dev->od_dirent_journal = 0; + return rc; +} /** - * Returns the value (i.e. fid/igif) at current position from iterator's - * in memory structure. + * Returns the value at current position from iterator's in memory structure. * * \param di struct osd_it_ea, iterator's in memory structure * \param attr attr requested for dirent. @@ -4472,16 +4840,31 @@ static inline int osd_it_ea_rec(const struct lu_env *env, int rc = 0; ENTRY; - if (!fid_is_sane(fid)) { - rc = osd_ea_fid_get(env, obj, ino, fid, id); - if (rc != 0) { - fid_zero(&oic->oic_fid); - RETURN(rc); + if (attr & LUDA_VERIFY) { + attr |= LUDA_TYPE; + if (unlikely(ino == osd_sb(dev)->s_root->d_inode->i_ino)) { + attr |= LUDA_IGNORE; + rc = 0; + goto pack; } + + rc = osd_dirent_check_repair(env, obj, it, fid, id, &attr); } else { - osd_id_gen(id, ino, OSD_OII_NOGEN); + attr &= ~LU_DIRENT_ATTRS_MASK; + if (!fid_is_sane(fid)) { + if (OBD_FAIL_CHECK(OBD_FAIL_FID_LOOKUP)) + RETURN(-ENOENT); + + rc = osd_ea_fid_get(env, obj, ino, fid, id); + } else { + osd_id_gen(id, ino, OSD_OII_NOGEN); + } } + if (rc < 0) + RETURN(rc); + +pack: osd_it_pack_dirent(lde, fid, it->oie_dirent->oied_off, it->oie_dirent->oied_name, it->oie_dirent->oied_namelen, @@ -4490,9 +4873,13 @@ static inline int osd_it_ea_rec(const struct lu_env *env, if (osd_remote_fid(env, dev, fid)) RETURN(0); - oic->oic_lid = *id; - oic->oic_fid = *fid; - if ((scrub->os_pos_current <= ino) && + if (likely(!(attr & LUDA_IGNORE))) { + oic->oic_lid = *id; + oic->oic_fid = *fid; + } + + if (!(attr & LUDA_VERIFY) && + (scrub->os_pos_current <= ino) && ((sf->sf_flags & SF_INCONSISTENT) || (sf->sf_flags & SF_UPGRADE && fid_is_igif(fid)) || ldiskfs_test_bit(osd_oi_fid2idx(dev, fid), sf->sf_oi_bitmap))) diff --git a/lustre/osd-ldiskfs/osd_internal.h b/lustre/osd-ldiskfs/osd_internal.h index d4fe344..19560c5 100644 --- a/lustre/osd-ldiskfs/osd_internal.h +++ b/lustre/osd-ldiskfs/osd_internal.h @@ -267,6 +267,7 @@ struct osd_device { spinlock_t od_osfs_lock; unsigned int od_noscrub:1, + od_dirent_journal:1, od_handle_nolma:1; struct fsfilt_operations *od_fsops; diff --git a/lustre/osd-ldiskfs/osd_scrub.c b/lustre/osd-ldiskfs/osd_scrub.c index 464c132..858aa07 100644 --- a/lustre/osd-ldiskfs/osd_scrub.c +++ b/lustre/osd-ldiskfs/osd_scrub.c @@ -417,7 +417,8 @@ osd_scrub_check_update(struct osd_thread_info *info, struct osd_device *dev, if (fid_is_igif(fid)) sf->sf_items_igif++; - if (val == SCRUB_NEXT_NOLMA && !dev->od_handle_nolma) + if ((val == SCRUB_NEXT_NOLMA) && + (!dev->od_handle_nolma || OBD_FAIL_CHECK(OBD_FAIL_FID_NOLMA))) GOTO(out, rc = 0); if ((oii != NULL && oii->oii_insert) || (val == SCRUB_NEXT_NOLMA)) diff --git a/lustre/tests/sanity-lfsck.sh b/lustre/tests/sanity-lfsck.sh index 3a550cf..6163542 100644 --- a/lustre/tests/sanity-lfsck.sh +++ b/lustre/tests/sanity-lfsck.sh @@ -21,6 +21,7 @@ init_logging skip "test LFSCK only for ldiskfs" && exit 0 require_dsh_mds || exit 0 +MCREATE=${MCREATE:-mcreate} SAVED_MDSSIZE=${MDSSIZE} SAVED_OSTSIZE=${OSTSIZE} # use small MDS + OST size to speed formatting time @@ -114,6 +115,133 @@ test_0() { } run_test 0 "Control LFSCK manually" +test_1a() { + lfsck_prep 1 1 + echo "start $SINGLEMDS" + start $SINGLEMDS $MDT_DEVNAME $MOUNT_OPTS_SCRUB > /dev/null || + error "(1) Fail to start MDS!" + + mount_client $MOUNT || error "(2) Fail to start client!" + + #define OBD_FAIL_FID_INDIR 0x1501 + do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1501 + touch $DIR/$tdir/dummy + + do_facet $SINGLEMDS $LCTL set_param fail_loc=0 + umount_client $MOUNT + $START_NAMESPACE || error "(3) Fail to start LFSCK for namespace!" + + sleep 3 + local STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }') + [ "$STATUS" == "completed" ] || + error "(4) Expect 'completed', but got '$STATUS'" + + local repaired=$($SHOW_NAMESPACE | + awk '/^updated_phase1/ { print $2 }') + [ $repaired -eq 1 ] || + error "(5) Fail to repair crashed FID-in-dirent: $repaired" + + mount_client $MOUNT || error "(6) Fail to start client!" + + #define OBD_FAIL_FID_LOOKUP 0x1505 + do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1505 + ls $DIR/$tdir/ > /dev/null || error "(7) no FID-in-dirent." + + do_facet $SINGLEMDS $LCTL set_param fail_loc=0 +} +run_test 1a "LFSCK can find out and repair crashed FID-in-dirent" + +test_1b() +{ + lfsck_prep 1 1 + echo "start $SINGLEMDS" + start $SINGLEMDS $MDT_DEVNAME $MOUNT_OPTS_SCRUB > /dev/null || + error "(1) Fail to start MDS!" + + mount_client $MOUNT || error "(2) Fail to start client!" + + #define OBD_FAIL_FID_INLMA 0x1502 + do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1502 + touch $DIR/$tdir/dummy + + do_facet $SINGLEMDS $LCTL set_param fail_loc=0 + umount_client $MOUNT + #define OBD_FAIL_FID_NOLMA 0x1506 + do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1506 + $START_NAMESPACE || error "(3) Fail to start LFSCK for namespace!" + + sleep 3 + local STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }') + [ "$STATUS" == "completed" ] || + error "(4) Expect 'completed', but got '$STATUS'" + + local repaired=$($SHOW_NAMESPACE | + awk '/^updated_phase1/ { print $2 }') + [ $repaired -eq 1 ] || + error "(5) Fail to repair missed FID-in-LMA: $repaired" + + do_facet $SINGLEMDS $LCTL set_param fail_loc=0 + mount_client $MOUNT || error "(6) Fail to start client!" + + #define OBD_FAIL_FID_LOOKUP 0x1505 + do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1505 + stat $DIR/$tdir/dummy > /dev/null || error "(7) no FID-in-LMA." + + do_facet $SINGLEMDS $LCTL set_param fail_loc=0 +} +run_test 1b "LFSCK can find out and repair missed FID-in-LMA" + +test_4() +{ + lfsck_prep 3 3 + mds_backup_restore || error "(1) Fail to backup/restore!" + echo "start $SINGLEMDS with disabling OI scrub" + start $SINGLEMDS $MDT_DEVNAME $MOUNT_OPTS_NOSCRUB > /dev/null || + error "(2) Fail to start MDS!" + + local STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }') + [ "$STATUS" == "init" ] || + error "(3) Expect 'init', but got '$STATUS'" + + #define OBD_FAIL_LFSCK_DELAY2 0x1601 + do_facet $SINGLEMDS $LCTL set_param fail_val=1 + do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1601 + $START_NAMESPACE || error "(4) Fail to start LFSCK for namespace!" + + sleep 5 + STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }') + [ "$STATUS" == "scanning-phase1" ] || + error "(5) Expect 'scanning-phase1', but got '$STATUS'" + + local FLAGS=$($SHOW_NAMESPACE | awk '/^flags/ { print $2 }') + [ "$FLAGS" == "inconsistent" ] || + error "(6) Expect 'inconsistent', but got '$FLAGS'" + + do_facet $SINGLEMDS $LCTL set_param fail_loc=0 + do_facet $SINGLEMDS $LCTL set_param fail_val=0 + sleep 3 + STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }') + [ "$STATUS" == "completed" ] || + error "(7) Expect 'completed', but got '$STATUS'" + + FLAGS=$($SHOW_NAMESPACE | awk '/^flags/ { print $2 }') + [ -z "$FLAGS" ] || error "(8) Expect empty flags, but got '$FLAGS'" + + local repaired=$($SHOW_NAMESPACE | + awk '/^updated_phase1/ { print $2 }') + [ $repaired -ge 9 ] || + error "(9) Fail to repair crashed linkEA: $repaired" + + mount_client $MOUNT || error "(10) Fail to start client!" + + #define OBD_FAIL_FID_LOOKUP 0x1505 + do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1505 + ls $DIR/$tdir/ > /dev/null || error "(11) no FID-in-dirent." + + do_facet $SINGLEMDS $LCTL set_param fail_loc=0 +} +run_test 4 "FID-in-dirent can be rebuilt after MDT file-level backup/restore" + test_6a() { lfsck_prep 10 10 echo "start $SINGLEMDS" -- 1.8.3.1