Whamcloud - gitweb
LU-1866 lfsck: FID-in-{dirent,LMA} check and repair
authorFan Yong <yong.fan@whamcloud.com>
Tue, 15 Jan 2013 09:50:49 +0000 (17:50 +0800)
committerOleg Drokin <oleg.drokin@intel.com>
Wed, 6 Feb 2013 21:02:21 +0000 (16:02 -0500)
As part of LFSCK 1.5, it implementes the following functionalities:

1) Verify and rebuild FID-in-dirent, mainly for MDT file-level
   backup/restore.

2) (Partly) verify and rebuild FID-in-LMA: if the FID-in-dirent
   is valid but FID-in-LMA is missed, then the later one will be
   repaired; but if both are missed, then it is OI scrub's duty
   to repair the inconsistency.

3) (Partly) handle IGIF objects upgrading: build IGIF-in-dirent
   for IGIF objects. The other two parts work for IGIF objects
   upgrading are:

3.1) Generate IGIF-in-LMA, which has been done by OI scrub in
     former patch(es).

3.2) Generate linkEA, which will be done by up layer LFSCK in
     other patch(es).

Test-Parameters: envdefinitions=ENABLE_QUOTA=yes testlist=sanity-lfsck

Signed-off-by: Fan Yong <fan.yong@intel.com>
Change-Id: Ibb317f0f3f3e9cf6db56bfa25f55d2017b0e1937
Reviewed-on: http://review.whamcloud.com/4912
Tested-by: Hudson
Reviewed-by: Alex Zhuravlev <alexey.zhuravlev@intel.com>
Tested-by: Maloo <whamcloud.maloo@gmail.com>
Reviewed-by: Andreas Dilger <andreas.dilger@intel.com>
Reviewed-by: Oleg Drokin <oleg.drokin@intel.com>
lustre/include/lustre/lustre_idl.h
lustre/include/obd_support.h
lustre/obdclass/lu_object.c
lustre/osd-ldiskfs/osd_handler.c
lustre/osd-ldiskfs/osd_internal.h
lustre/osd-ldiskfs/osd_scrub.c
lustre/tests/sanity-lfsck.sh

index 0750b87..0b2adb4 100644 (file)
@@ -729,50 +729,46 @@ static inline void lu_igif_build(struct lu_fid *fid, __u32 ino, __u32 gen)
  */
 static inline void fid_cpu_to_le(struct lu_fid *dst, const struct lu_fid *src)
 {
  */
 static inline void fid_cpu_to_le(struct lu_fid *dst, const struct lu_fid *src)
 {
-        /* check that all fields are converted */
-        CLASSERT(sizeof *src ==
-                 sizeof fid_seq(src) +
-                 sizeof fid_oid(src) + sizeof fid_ver(src));
-        LASSERTF(fid_is_igif(src) || fid_ver(src) == 0, DFID"\n", PFID(src));
-        dst->f_seq = cpu_to_le64(fid_seq(src));
-        dst->f_oid = cpu_to_le32(fid_oid(src));
-        dst->f_ver = cpu_to_le32(fid_ver(src));
+       /* check that all fields are converted */
+       CLASSERT(sizeof *src ==
+                sizeof fid_seq(src) +
+                sizeof fid_oid(src) + sizeof fid_ver(src));
+       dst->f_seq = cpu_to_le64(fid_seq(src));
+       dst->f_oid = cpu_to_le32(fid_oid(src));
+       dst->f_ver = cpu_to_le32(fid_ver(src));
 }
 
 static inline void fid_le_to_cpu(struct lu_fid *dst, const struct lu_fid *src)
 {
 }
 
 static inline void fid_le_to_cpu(struct lu_fid *dst, const struct lu_fid *src)
 {
-        /* check that all fields are converted */
-        CLASSERT(sizeof *src ==
-                 sizeof fid_seq(src) +
-                 sizeof fid_oid(src) + sizeof fid_ver(src));
-        dst->f_seq = le64_to_cpu(fid_seq(src));
-        dst->f_oid = le32_to_cpu(fid_oid(src));
-        dst->f_ver = le32_to_cpu(fid_ver(src));
-        LASSERTF(fid_is_igif(dst) || fid_ver(dst) == 0, DFID"\n", PFID(dst));
+       /* check that all fields are converted */
+       CLASSERT(sizeof *src ==
+                sizeof fid_seq(src) +
+                sizeof fid_oid(src) + sizeof fid_ver(src));
+       dst->f_seq = le64_to_cpu(fid_seq(src));
+       dst->f_oid = le32_to_cpu(fid_oid(src));
+       dst->f_ver = le32_to_cpu(fid_ver(src));
 }
 
 static inline void fid_cpu_to_be(struct lu_fid *dst, const struct lu_fid *src)
 {
 }
 
 static inline void fid_cpu_to_be(struct lu_fid *dst, const struct lu_fid *src)
 {
-        /* check that all fields are converted */
-        CLASSERT(sizeof *src ==
-                 sizeof fid_seq(src) +
-                 sizeof fid_oid(src) + sizeof fid_ver(src));
-        LASSERTF(fid_is_igif(src) || fid_ver(src) == 0, DFID"\n", PFID(src));
-        dst->f_seq = cpu_to_be64(fid_seq(src));
-        dst->f_oid = cpu_to_be32(fid_oid(src));
-        dst->f_ver = cpu_to_be32(fid_ver(src));
+       /* check that all fields are converted */
+       CLASSERT(sizeof *src ==
+                sizeof fid_seq(src) +
+                sizeof fid_oid(src) + sizeof fid_ver(src));
+       dst->f_seq = cpu_to_be64(fid_seq(src));
+       dst->f_oid = cpu_to_be32(fid_oid(src));
+       dst->f_ver = cpu_to_be32(fid_ver(src));
 }
 
 static inline void fid_be_to_cpu(struct lu_fid *dst, const struct lu_fid *src)
 {
 }
 
 static inline void fid_be_to_cpu(struct lu_fid *dst, const struct lu_fid *src)
 {
-        /* check that all fields are converted */
-        CLASSERT(sizeof *src ==
-                 sizeof fid_seq(src) +
-                 sizeof fid_oid(src) + sizeof fid_ver(src));
-        dst->f_seq = be64_to_cpu(fid_seq(src));
-        dst->f_oid = be32_to_cpu(fid_oid(src));
-        dst->f_ver = be32_to_cpu(fid_ver(src));
-        LASSERTF(fid_is_igif(dst) || fid_ver(dst) == 0, DFID"\n", PFID(dst));
+       /* check that all fields are converted */
+       CLASSERT(sizeof *src ==
+                sizeof fid_seq(src) +
+                sizeof fid_oid(src) + sizeof fid_ver(src));
+       dst->f_seq = be64_to_cpu(fid_seq(src));
+       dst->f_oid = be32_to_cpu(fid_oid(src));
+       dst->f_ver = be32_to_cpu(fid_ver(src));
 }
 
 static inline int fid_is_sane(const struct lu_fid *fid)
 }
 
 static inline int fid_is_sane(const struct lu_fid *fid)
@@ -791,17 +787,12 @@ static inline int fid_is_zero(const struct lu_fid *fid)
 extern void lustre_swab_lu_fid(struct lu_fid *fid);
 extern void lustre_swab_lu_seq_range(struct lu_seq_range *range);
 
 extern void lustre_swab_lu_fid(struct lu_fid *fid);
 extern void lustre_swab_lu_seq_range(struct lu_seq_range *range);
 
-static inline int lu_fid_eq(const struct lu_fid *f0,
-                            const struct lu_fid *f1)
-{
-        /* Check that there is no alignment padding. */
-        CLASSERT(sizeof *f0 ==
-                 sizeof f0->f_seq + sizeof f0->f_oid + sizeof f0->f_ver);
-        LASSERTF((fid_is_igif(f0) || fid_is_idif(f0)) ||
-                 fid_ver(f0) == 0, DFID, PFID(f0));
-        LASSERTF((fid_is_igif(f1) || fid_is_idif(f1)) ||
-                 fid_ver(f1) == 0, DFID, PFID(f1));
-        return memcmp(f0, f1, sizeof *f0) == 0;
+static inline int lu_fid_eq(const struct lu_fid *f0, const struct lu_fid *f1)
+{
+       /* Check that there is no alignment padding. */
+       CLASSERT(sizeof *f0 ==
+                sizeof f0->f_seq + sizeof f0->f_oid + sizeof f0->f_ver);
+       return memcmp(f0, f1, sizeof *f0) == 0;
 }
 
 #define __diff_normalize(val0, val1)                            \
 }
 
 #define __diff_normalize(val0, val1)                            \
index 79ec6aa..43a69f9 100644 (file)
@@ -463,6 +463,11 @@ int obd_alloc_fail(const void *ptr, const char *name, const char *type,
 
 #define OBD_FAIL_UPDATE_OBJ_NET                                0x1500
 
 
 #define OBD_FAIL_UPDATE_OBJ_NET                                0x1500
 
+#define OBD_FAIL_FID_INDIR     0x1501
+#define OBD_FAIL_FID_INLMA     0x1502
+#define OBD_FAIL_FID_LOOKUP    0x1505
+#define OBD_FAIL_FID_NOLMA     0x1506
+
 /* LFSCK */
 #define OBD_FAIL_LFSCK_DELAY1          0x1600
 #define OBD_FAIL_LFSCK_DELAY2          0x1601
 /* LFSCK */
 #define OBD_FAIL_LFSCK_DELAY1          0x1600
 #define OBD_FAIL_LFSCK_DELAY2          0x1601
index 09c189b..6392139 100644 (file)
@@ -196,7 +196,6 @@ static struct lu_object *lu_object_alloc(const struct lu_env *env,
          * This is the only place where object fid is assigned. It's constant
          * after this point.
          */
          * This is the only place where object fid is assigned. It's constant
          * after this point.
          */
-        LASSERT(fid_is_igif(f) || fid_ver(f) == 0);
         top->lo_header->loh_fid = *f;
         layers = &top->lo_header->loh_layers;
         do {
         top->lo_header->loh_fid = *f;
         layers = &top->lo_header->loh_layers;
         do {
index a36f86a..fcde844 100644 (file)
@@ -2219,6 +2219,9 @@ int osd_ea_fid_set(struct osd_thread_info *info, struct inode *inode,
        struct lustre_mdt_attrs *lma = &info->oti_mdt_attrs;
        int                      rc;
 
        struct lustre_mdt_attrs *lma = &info->oti_mdt_attrs;
        int                      rc;
 
+       if (OBD_FAIL_CHECK(OBD_FAIL_FID_INLMA))
+               return 0;
+
        lustre_lma_init(lma, fid);
        lustre_lma_swab(lma);
 
        lustre_lma_init(lma, fid);
        lustre_lma_swab(lma);
 
@@ -3149,7 +3152,7 @@ static inline int osd_get_fid_from_dentry(struct ldiskfs_dir_entry_2 *de,
                 rec = (struct osd_fid_pack *) (de->name + de->name_len + 1);
                 rc = osd_fid_unpack((struct lu_fid *)fid, rec);
         }
                 rec = (struct osd_fid_pack *) (de->name + de->name_len + 1);
                 rc = osd_fid_unpack((struct lu_fid *)fid, rec);
         }
-        RETURN(rc);
+       return rc;
 }
 
 static int osd_remote_fid(const struct lu_env *env, struct osd_device *osd,
 }
 
 static int osd_remote_fid(const struct lu_env *env, struct osd_device *osd,
@@ -3544,8 +3547,18 @@ static int osd_ea_add_rec(const struct lu_env *env, struct osd_object *pobj,
                        down_write(&pobj->oo_ext_idx_sem);
                 }
 
                        down_write(&pobj->oo_ext_idx_sem);
                 }
 
-                rc = __osd_ea_add_rec(info, pobj, cinode, name, fid,
-                                      hlock, th);
+               if (OBD_FAIL_CHECK(OBD_FAIL_FID_INDIR)) {
+                       struct lu_fid *tfid = &info->oti_fid;
+
+                       *tfid = *(const struct lu_fid *)fid;
+                       tfid->f_ver = ~0;
+                       rc = __osd_ea_add_rec(info, pobj, cinode, name,
+                                             (const struct dt_rec *)tfid,
+                                             hlock, th);
+               } else {
+                       rc = __osd_ea_add_rec(info, pobj, cinode, name, fid,
+                                             hlock, th);
+               }
         }
         if (hlock != NULL)
                 ldiskfs_htree_unlock(hlock);
         }
         if (hlock != NULL)
                 ldiskfs_htree_unlock(hlock);
@@ -3604,6 +3617,31 @@ again:
        EXIT;
 }
 
        EXIT;
 }
 
+static int osd_fail_fid_lookup(struct osd_thread_info *oti,
+                              struct osd_device *dev,
+                              struct osd_idmap_cache *oic,
+                              struct lu_fid *fid, __u32 ino)
+{
+       struct lustre_mdt_attrs *lma   = &oti->oti_mdt_attrs;
+       struct inode            *inode;
+       int                      rc;
+
+       osd_id_gen(&oic->oic_lid, ino, OSD_OII_NOGEN);
+       inode = osd_iget(oti, dev, &oic->oic_lid);
+       if (IS_ERR(inode)) {
+               fid_zero(&oic->oic_fid);
+               return PTR_ERR(inode);
+       }
+
+       rc = osd_get_lma(oti, inode, &oti->oti_obj_dentry, lma);
+       iput(inode);
+       if (rc != 0)
+               fid_zero(&oic->oic_fid);
+       else
+               *fid = oic->oic_fid = lma->lma_self_fid;
+       return rc;
+}
+
 /**
  * Calls ->lookup() to find dentry. From dentry get inode and
  * read inode's ea to get fid. This is required for  interoperability
 /**
  * Calls ->lookup() to find dentry. From dentry get inode and
  * read inode's ea to get fid. This is required for  interoperability
@@ -3648,6 +3686,12 @@ static int osd_ea_lookup_rec(const struct lu_env *env, struct osd_object *obj,
                struct scrub_file *sf = &scrub->os_file;
 
                ino = le32_to_cpu(de->inode);
                struct scrub_file *sf = &scrub->os_file;
 
                ino = le32_to_cpu(de->inode);
+               if (OBD_FAIL_CHECK(OBD_FAIL_FID_LOOKUP)) {
+                       brelse(bh);
+                       rc = osd_fail_fid_lookup(oti, dev, oic, fid, ino);
+                       GOTO(out, rc);
+               }
+
                rc = osd_get_fid_from_dentry(de, rec);
 
                /* done with de, release bh */
                rc = osd_get_fid_from_dentry(de, rec);
 
                /* done with de, release bh */
@@ -4037,44 +4081,41 @@ static int osd_it_iam_key_size(const struct lu_env *env, const struct dt_it *di)
         return iam_it_key_size(&it->oi_it);
 }
 
         return iam_it_key_size(&it->oi_it);
 }
 
-static inline void osd_it_append_attrs(struct lu_dirent *ent, __u32 attr,
-                                       int len, __u16 type)
+static inline void
+osd_it_append_attrs(struct lu_dirent *ent, int len, __u16 type)
 {
 {
-        struct luda_type *lt;
-        const unsigned    align = sizeof(struct luda_type) - 1;
-
-        /* check if file type is required */
-        if (attr & LUDA_TYPE) {
-                        len = (len + align) & ~align;
+       /* check if file type is required */
+       if (ent->lde_attrs & LUDA_TYPE) {
+               int align = sizeof(struct luda_type) - 1;
+               struct luda_type *lt;
 
 
-                        lt = (void *) ent->lde_name + len;
-                        lt->lt_type = cpu_to_le16(CFS_DTTOIF(type));
-                        ent->lde_attrs |= LUDA_TYPE;
-        }
+               len = (len + align) & ~align;
+               lt = (struct luda_type *)(ent->lde_name + len);
+               lt->lt_type = cpu_to_le16(CFS_DTTOIF(type));
+       }
 
 
-        ent->lde_attrs = cpu_to_le32(ent->lde_attrs);
+       ent->lde_attrs = cpu_to_le32(ent->lde_attrs);
 }
 
 /**
  * build lu direct from backend fs dirent.
  */
 
 }
 
 /**
  * build lu direct from backend fs dirent.
  */
 
-static inline void osd_it_pack_dirent(struct lu_dirent *ent,
-                                      struct lu_fid *fid, __u64 offset,
-                                      char *name, __u16 namelen,
-                                      __u16 type, __u32 attr)
+static inline void
+osd_it_pack_dirent(struct lu_dirent *ent, struct lu_fid *fid, __u64 offset,
+                  char *name, __u16 namelen, __u16 type, __u32 attr)
 {
 {
-        fid_cpu_to_le(&ent->lde_fid, fid);
-        ent->lde_attrs = LUDA_FID;
+       ent->lde_attrs = attr | LUDA_FID;
+       fid_cpu_to_le(&ent->lde_fid, fid);
 
 
-        ent->lde_hash = cpu_to_le64(offset);
-        ent->lde_reclen = cpu_to_le16(lu_dirent_calc_size(namelen, attr));
+       ent->lde_hash = cpu_to_le64(offset);
+       ent->lde_reclen = cpu_to_le16(lu_dirent_calc_size(namelen, attr));
 
 
-        strncpy(ent->lde_name, name, namelen);
-        ent->lde_namelen = cpu_to_le16(namelen);
+       strncpy(ent->lde_name, name, namelen);
+       ent->lde_namelen = cpu_to_le16(namelen);
 
 
-        /* append lustre attributes */
-        osd_it_append_attrs(ent, attr, namelen, type);
+       /* append lustre attributes */
+       osd_it_append_attrs(ent, namelen, type);
 }
 
 /**
 }
 
 /**
@@ -4442,10 +4483,337 @@ static int osd_it_ea_key_size(const struct lu_env *env, const struct dt_it *di)
         return it->oie_dirent->oied_namelen;
 }
 
         return it->oie_dirent->oied_namelen;
 }
 
+static int
+osd_dirent_update(handle_t *jh, struct super_block *sb,
+                 struct osd_it_ea_dirent *ent, struct lu_fid *fid,
+                 struct buffer_head *bh, struct ldiskfs_dir_entry_2 *de)
+{
+       struct osd_fid_pack *rec;
+       int                  rc;
+       ENTRY;
+
+       LASSERT(de->file_type & LDISKFS_DIRENT_LUFID);
+       LASSERT(de->rec_len >= de->name_len + sizeof(struct osd_fid_pack));
+
+       rc = ldiskfs_journal_get_write_access(jh, bh);
+       if (rc != 0) {
+               CERROR("%.16s: fail to write access for update dirent: "
+                      "name = %.*s, rc = %d\n",
+                      LDISKFS_SB(sb)->s_es->s_volume_name,
+                      ent->oied_namelen, ent->oied_name, rc);
+               RETURN(rc);
+       }
+
+       rec = (struct osd_fid_pack *)(de->name + de->name_len + 1);
+       fid_cpu_to_be((struct lu_fid *)rec->fp_area, fid);
+       rc = ldiskfs_journal_dirty_metadata(jh, bh);
+       if (rc != 0)
+               CERROR("%.16s: fail to dirty metadata for update dirent: "
+                      "name = %.*s, rc = %d\n",
+                      LDISKFS_SB(sb)->s_es->s_volume_name,
+                      ent->oied_namelen, ent->oied_name, rc);
+
+       RETURN(rc);
+}
+
+static inline int
+osd_dirent_has_space(__u16 reclen, __u16 namelen, unsigned blocksize)
+{
+       if (ldiskfs_rec_len_from_disk(reclen, blocksize) >=
+           __LDISKFS_DIR_REC_LEN(namelen + 1 + sizeof(struct osd_fid_pack)))
+               return 1;
+       else
+               return 0;
+}
+
+static int
+osd_dirent_reinsert(const struct lu_env *env, handle_t *jh,
+                   struct inode *dir, struct inode *inode,
+                   struct osd_it_ea_dirent *ent, struct lu_fid *fid,
+                   struct buffer_head *bh, struct ldiskfs_dir_entry_2 *de,
+                   struct htree_lock *hlock)
+{
+       struct dentry               *dentry;
+       struct osd_fid_pack         *rec;
+       struct ldiskfs_dentry_param *ldp;
+       int                          rc;
+       ENTRY;
+
+       if (!LDISKFS_HAS_INCOMPAT_FEATURE(inode->i_sb,
+                                         LDISKFS_FEATURE_INCOMPAT_DIRDATA))
+               RETURN(0);
+
+       /* There is enough space to hold the FID-in-dirent. */
+       if (osd_dirent_has_space(de->rec_len, ent->oied_namelen,
+                                dir->i_sb->s_blocksize)) {
+               rc = ldiskfs_journal_get_write_access(jh, bh);
+               if (rc != 0) {
+                       CERROR("%.16s: fail to write access for reinsert "
+                              "dirent: name = %.*s, rc = %d\n",
+                              LDISKFS_SB(inode->i_sb)->s_es->s_volume_name,
+                              ent->oied_namelen, ent->oied_name, rc);
+                       RETURN(rc);
+               }
+
+               de->name[de->name_len] = 0;
+               rec = (struct osd_fid_pack *)(de->name + de->name_len + 1);
+               rec->fp_len = sizeof(struct lu_fid) + 1;
+               fid_cpu_to_be((struct lu_fid *)rec->fp_area, fid);
+               de->file_type |= LDISKFS_DIRENT_LUFID;
+
+               rc = ldiskfs_journal_dirty_metadata(jh, bh);
+               if (rc != 0)
+                       CERROR("%.16s: fail to dirty metadata for reinsert "
+                              "dirent: name = %.*s, rc = %d\n",
+                              LDISKFS_SB(inode->i_sb)->s_es->s_volume_name,
+                              ent->oied_namelen, ent->oied_name, rc);
+
+               RETURN(rc);
+       }
+
+       rc = ldiskfs_delete_entry(jh, dir, de, bh);
+       if (rc != 0) {
+               CERROR("%.16s: fail to delete entry for reinsert dirent: "
+                      "name = %.*s, rc = %d\n",
+                      LDISKFS_SB(inode->i_sb)->s_es->s_volume_name,
+                      ent->oied_namelen, ent->oied_name, rc);
+               RETURN(rc);
+       }
+
+       dentry = osd_child_dentry_by_inode(env, dir, ent->oied_name,
+                                          ent->oied_namelen);
+       ldp = (struct ldiskfs_dentry_param *)osd_oti_get(env)->oti_ldp;
+       osd_get_ldiskfs_dirent_param(ldp, (const struct dt_rec *)fid);
+       dentry->d_fsdata = (void *)ldp;
+       ll_vfs_dq_init(dir);
+       rc = osd_ldiskfs_add_entry(jh, dentry, inode, hlock);
+       /* It is too bad, we cannot reinsert the name entry back.
+        * That means we lose it! */
+       if (rc != 0)
+               CERROR("%.16s: fail to insert entry for reinsert dirent: "
+                      "name = %.*s, rc = %d\n",
+                      LDISKFS_SB(inode->i_sb)->s_es->s_volume_name,
+                      ent->oied_namelen, ent->oied_name, rc);
+
+       RETURN(rc);
+}
+
+static int
+osd_dirent_check_repair(const struct lu_env *env, struct osd_object *obj,
+                       struct osd_it_ea *it, struct lu_fid *fid,
+                       struct osd_inode_id *id, __u32 *attr)
+{
+       struct osd_thread_info     *info        = osd_oti_get(env);
+       struct lustre_mdt_attrs    *lma         = &info->oti_mdt_attrs;
+       struct osd_device          *dev         = osd_obj2dev(obj);
+       struct super_block         *sb          = osd_sb(dev);
+       const char                 *devname     =
+                                       LDISKFS_SB(sb)->s_es->s_volume_name;
+       struct osd_it_ea_dirent    *ent         = it->oie_dirent;
+       struct inode               *dir         = obj->oo_inode;
+       struct htree_lock          *hlock       = NULL;
+       struct buffer_head         *bh          = NULL;
+       handle_t                   *jh          = NULL;
+       struct ldiskfs_dir_entry_2 *de;
+       struct dentry              *dentry;
+       struct inode               *inode;
+       int                         credits;
+       int                         rc;
+       bool                        dirty       = false;
+       bool                        is_dotdot   = false;
+       ENTRY;
+
+       if (ent->oied_name[0] == '.') {
+               /* Skip dot entry, even if it has stale FID-in-dirent, because
+                * we do not use such FID-in-dirent anymore, it is harmless. */
+               if (ent->oied_namelen == 1)
+                       RETURN(0);
+
+               if (ent->oied_namelen == 2 && ent->oied_name[1] == '.')
+                       is_dotdot = true;
+       }
+
+       dentry = osd_child_dentry_get(env, obj, ent->oied_name,
+                                     ent->oied_namelen);
+
+       /* We need to ensure that the name entry is still valid.
+        * Because it may be removed or renamed by other already.
+        *
+        * The unlink or rename operation will start journal before PDO lock,
+        * so to avoid deadlock, here we need to start journal handle before
+        * related PDO lock also. But because we do not know whether there
+        * will be something to be repaired before PDO lock, we just start
+        * journal without conditions.
+        *
+        * We may need to remove the name entry firstly, then insert back.
+        * One credit is for user quota file update.
+        * One credit is for group quota file update.
+        * Two credits are for dirty inode. */
+       credits = osd_dto_credits_noquota[DTO_INDEX_DELETE] +
+                 osd_dto_credits_noquota[DTO_INDEX_INSERT] + 1 + 1 + 2;
+
+again:
+       if (dev->od_dirent_journal) {
+               jh = ldiskfs_journal_start_sb(sb, credits);
+               if (IS_ERR(jh)) {
+                       rc = PTR_ERR(jh);
+                       CERROR("%.16s: fail to start trans for dirent "
+                              "check_repair: credits %d, name %.*s, rc %d\n",
+                              devname, credits, ent->oied_namelen,
+                              ent->oied_name, rc);
+                       RETURN(rc);
+               }
+       }
+
+       if (obj->oo_hl_head != NULL) {
+               hlock = osd_oti_get(env)->oti_hlock;
+               ldiskfs_htree_lock(hlock, obj->oo_hl_head, dir,
+                                  LDISKFS_HLOCK_DEL);
+       } else {
+               down_write(&obj->oo_ext_idx_sem);
+       }
+
+       bh = osd_ldiskfs_find_entry(dir, dentry, &de, hlock);
+       /* For dotdot entry, if there is not enough space to hold FID-in-dirent,
+        * just keep it there. It only happens when the device upgraded from 1.8
+        * or restored from MDT file-level backup. For the whole directory, only
+        * dotdot entry has no FID-in-dirent and needs to get FID from LMA when
+        * readdir, it will not affect the performance much. */
+       if ((bh == NULL) || (le32_to_cpu(de->inode) != ent->oied_ino) ||
+           (is_dotdot && !osd_dirent_has_space(de->rec_len,
+                                               ent->oied_namelen,
+                                               sb->s_blocksize))) {
+               *attr |= LUDA_IGNORE;
+               GOTO(out_journal, rc = 0);
+       }
+
+       osd_id_gen(id, ent->oied_ino, OSD_OII_NOGEN);
+       inode = osd_iget(info, dev, id);
+       if (IS_ERR(inode)) {
+               rc = PTR_ERR(inode);
+               if (rc == -ENOENT || rc == -ESTALE) {
+                       *attr |= LUDA_IGNORE;
+                       rc = 0;
+               }
+
+               GOTO(out_journal, rc);
+       }
+
+       rc = osd_get_lma(info, inode, &info->oti_obj_dentry, lma);
+       if (rc == 0) {
+               if (fid_is_sane(fid)) {
+                       /* FID-in-dirent is valid. */
+                       if (lu_fid_eq(fid, &lma->lma_self_fid))
+                               GOTO(out_inode, rc = 0);
+
+                       /* Do not repair under dryrun mode. */
+                       if (*attr & LUDA_VERIFY_DRYRUN) {
+                               *attr |= LUDA_REPAIR;
+                               GOTO(out_inode, rc = 0);
+                       }
+
+                       if (!dev->od_dirent_journal) {
+                               iput(inode);
+                               brelse(bh);
+                               if (hlock != NULL)
+                                       ldiskfs_htree_unlock(hlock);
+                               else
+                                       up_write(&obj->oo_ext_idx_sem);
+                               dev->od_dirent_journal = 1;
+                               goto again;
+                       }
+
+                       *fid = lma->lma_self_fid;
+                       dirty = true;
+                       /* Update the FID-in-dirent. */
+                       rc = osd_dirent_update(jh, sb, ent, fid, bh, de);
+                       if (rc == 0)
+                               *attr |= LUDA_REPAIR;
+               } else {
+                       /* Do not repair under dryrun mode. */
+                       if (*attr & LUDA_VERIFY_DRYRUN) {
+                               *attr |= LUDA_REPAIR;
+                               GOTO(out_inode, rc = 0);
+                       }
+
+                       if (!dev->od_dirent_journal) {
+                               iput(inode);
+                               brelse(bh);
+                               if (hlock != NULL)
+                                       ldiskfs_htree_unlock(hlock);
+                               else
+                                       up_write(&obj->oo_ext_idx_sem);
+                               dev->od_dirent_journal = 1;
+                               goto again;
+                       }
+
+                       *fid = lma->lma_self_fid;
+                       dirty = true;
+                       /* Append the FID-in-dirent. */
+                       rc = osd_dirent_reinsert(env, jh, dir, inode, ent,
+                                                fid, bh, de, hlock);
+                       if (rc == 0)
+                               *attr |= LUDA_REPAIR;
+               }
+       } else if (rc == -ENODATA) {
+               /* Do not repair under dryrun mode. */
+               if (*attr & LUDA_VERIFY_DRYRUN) {
+                       if (fid_is_sane(fid))
+                               *attr |= LUDA_REPAIR;
+                       else
+                               *attr |= LUDA_UPGRADE;
+                       GOTO(out_inode, rc = 0);
+               }
+
+               if (!dev->od_dirent_journal) {
+                       iput(inode);
+                       brelse(bh);
+                       if (hlock != NULL)
+                               ldiskfs_htree_unlock(hlock);
+                       else
+                               up_write(&obj->oo_ext_idx_sem);
+                       dev->od_dirent_journal = 1;
+                       goto again;
+               }
+
+               dirty = true;
+               if (unlikely(fid_is_sane(fid))) {
+                       /* FID-in-dirent exists, but FID-in-LMA is lost.
+                        * Trust the FID-in-dirent, and add FID-in-LMA. */
+                       rc = osd_ea_fid_set(info, inode, fid);
+                       if (rc == 0)
+                               *attr |= LUDA_REPAIR;
+               } else {
+                       lu_igif_build(fid, inode->i_ino, inode->i_generation);
+                       /* It is probably IGIF object. Only aappend the
+                        * FID-in-dirent. OI scrub will process FID-in-LMA. */
+                       rc = osd_dirent_reinsert(env, jh, dir, inode, ent,
+                                                fid, bh, de, hlock);
+                       if (rc == 0)
+                               *attr |= LUDA_UPGRADE;
+               }
+       }
+
+       GOTO(out_inode, rc);
+
+out_inode:
+       iput(inode);
+
+out_journal:
+       brelse(bh);
+       if (hlock != NULL)
+               ldiskfs_htree_unlock(hlock);
+       else
+               up_write(&obj->oo_ext_idx_sem);
+       if (jh != NULL)
+               ldiskfs_journal_stop(jh);
+       if (rc >= 0 && !dirty)
+               dev->od_dirent_journal = 0;
+       return rc;
+}
 
 /**
 
 /**
- * Returns the value (i.e. fid/igif) at current position from iterator's
- * in memory structure.
+ * Returns the value at current position from iterator's in memory structure.
  *
  * \param di struct osd_it_ea, iterator's in memory structure
  * \param attr attr requested for dirent.
  *
  * \param di struct osd_it_ea, iterator's in memory structure
  * \param attr attr requested for dirent.
@@ -4472,16 +4840,31 @@ static inline int osd_it_ea_rec(const struct lu_env *env,
        int                     rc    = 0;
        ENTRY;
 
        int                     rc    = 0;
        ENTRY;
 
-       if (!fid_is_sane(fid)) {
-               rc = osd_ea_fid_get(env, obj, ino, fid, id);
-               if (rc != 0) {
-                       fid_zero(&oic->oic_fid);
-                       RETURN(rc);
+       if (attr & LUDA_VERIFY) {
+               attr |= LUDA_TYPE;
+               if (unlikely(ino == osd_sb(dev)->s_root->d_inode->i_ino)) {
+                       attr |= LUDA_IGNORE;
+                       rc = 0;
+                       goto pack;
                }
                }
+
+               rc = osd_dirent_check_repair(env, obj, it, fid, id, &attr);
        } else {
        } else {
-               osd_id_gen(id, ino, OSD_OII_NOGEN);
+               attr &= ~LU_DIRENT_ATTRS_MASK;
+               if (!fid_is_sane(fid)) {
+                       if (OBD_FAIL_CHECK(OBD_FAIL_FID_LOOKUP))
+                               RETURN(-ENOENT);
+
+                       rc = osd_ea_fid_get(env, obj, ino, fid, id);
+               } else {
+                       osd_id_gen(id, ino, OSD_OII_NOGEN);
+               }
        }
 
        }
 
+       if (rc < 0)
+               RETURN(rc);
+
+pack:
        osd_it_pack_dirent(lde, fid, it->oie_dirent->oied_off,
                           it->oie_dirent->oied_name,
                           it->oie_dirent->oied_namelen,
        osd_it_pack_dirent(lde, fid, it->oie_dirent->oied_off,
                           it->oie_dirent->oied_name,
                           it->oie_dirent->oied_namelen,
@@ -4490,9 +4873,13 @@ static inline int osd_it_ea_rec(const struct lu_env *env,
        if (osd_remote_fid(env, dev, fid))
                RETURN(0);
 
        if (osd_remote_fid(env, dev, fid))
                RETURN(0);
 
-       oic->oic_lid = *id;
-       oic->oic_fid = *fid;
-       if ((scrub->os_pos_current <= ino) &&
+       if (likely(!(attr & LUDA_IGNORE))) {
+               oic->oic_lid = *id;
+               oic->oic_fid = *fid;
+       }
+
+       if (!(attr & LUDA_VERIFY) &&
+           (scrub->os_pos_current <= ino) &&
            ((sf->sf_flags & SF_INCONSISTENT) ||
             (sf->sf_flags & SF_UPGRADE && fid_is_igif(fid)) ||
             ldiskfs_test_bit(osd_oi_fid2idx(dev, fid), sf->sf_oi_bitmap)))
            ((sf->sf_flags & SF_INCONSISTENT) ||
             (sf->sf_flags & SF_UPGRADE && fid_is_igif(fid)) ||
             ldiskfs_test_bit(osd_oi_fid2idx(dev, fid), sf->sf_oi_bitmap)))
index d4fe344..19560c5 100644 (file)
@@ -267,6 +267,7 @@ struct osd_device {
        spinlock_t                od_osfs_lock;
 
        unsigned int              od_noscrub:1,
        spinlock_t                od_osfs_lock;
 
        unsigned int              od_noscrub:1,
+                                 od_dirent_journal:1,
                                  od_handle_nolma:1;
 
        struct fsfilt_operations *od_fsops;
                                  od_handle_nolma:1;
 
        struct fsfilt_operations *od_fsops;
index 464c132..858aa07 100644 (file)
@@ -417,7 +417,8 @@ osd_scrub_check_update(struct osd_thread_info *info, struct osd_device *dev,
        if (fid_is_igif(fid))
                sf->sf_items_igif++;
 
        if (fid_is_igif(fid))
                sf->sf_items_igif++;
 
-       if (val == SCRUB_NEXT_NOLMA && !dev->od_handle_nolma)
+       if ((val == SCRUB_NEXT_NOLMA) &&
+           (!dev->od_handle_nolma || OBD_FAIL_CHECK(OBD_FAIL_FID_NOLMA)))
                GOTO(out, rc = 0);
 
        if ((oii != NULL && oii->oii_insert) || (val == SCRUB_NEXT_NOLMA))
                GOTO(out, rc = 0);
 
        if ((oii != NULL && oii->oii_insert) || (val == SCRUB_NEXT_NOLMA))
index 3a550cf..6163542 100644 (file)
@@ -21,6 +21,7 @@ init_logging
        skip "test LFSCK only for ldiskfs" && exit 0
 require_dsh_mds || exit 0
 
        skip "test LFSCK only for ldiskfs" && exit 0
 require_dsh_mds || exit 0
 
+MCREATE=${MCREATE:-mcreate}
 SAVED_MDSSIZE=${MDSSIZE}
 SAVED_OSTSIZE=${OSTSIZE}
 # use small MDS + OST size to speed formatting time
 SAVED_MDSSIZE=${MDSSIZE}
 SAVED_OSTSIZE=${OSTSIZE}
 # use small MDS + OST size to speed formatting time
@@ -114,6 +115,133 @@ test_0() {
 }
 run_test 0 "Control LFSCK manually"
 
 }
 run_test 0 "Control LFSCK manually"
 
+test_1a() {
+       lfsck_prep 1 1
+       echo "start $SINGLEMDS"
+       start $SINGLEMDS $MDT_DEVNAME $MOUNT_OPTS_SCRUB > /dev/null ||
+               error "(1) Fail to start MDS!"
+
+       mount_client $MOUNT || error "(2) Fail to start client!"
+
+       #define OBD_FAIL_FID_INDIR      0x1501
+       do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1501
+       touch $DIR/$tdir/dummy
+
+       do_facet $SINGLEMDS $LCTL set_param fail_loc=0
+       umount_client $MOUNT
+       $START_NAMESPACE || error "(3) Fail to start LFSCK for namespace!"
+
+       sleep 3
+       local STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }')
+       [ "$STATUS" == "completed" ] ||
+               error "(4) Expect 'completed', but got '$STATUS'"
+
+       local repaired=$($SHOW_NAMESPACE |
+                        awk '/^updated_phase1/ { print $2 }')
+       [ $repaired -eq 1 ] ||
+               error "(5) Fail to repair crashed FID-in-dirent: $repaired"
+
+       mount_client $MOUNT || error "(6) Fail to start client!"
+
+       #define OBD_FAIL_FID_LOOKUP     0x1505
+       do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1505
+       ls $DIR/$tdir/ > /dev/null || error "(7) no FID-in-dirent."
+
+       do_facet $SINGLEMDS $LCTL set_param fail_loc=0
+}
+run_test 1a "LFSCK can find out and repair crashed FID-in-dirent"
+
+test_1b()
+{
+       lfsck_prep 1 1
+       echo "start $SINGLEMDS"
+       start $SINGLEMDS $MDT_DEVNAME $MOUNT_OPTS_SCRUB > /dev/null ||
+               error "(1) Fail to start MDS!"
+
+       mount_client $MOUNT || error "(2) Fail to start client!"
+
+       #define OBD_FAIL_FID_INLMA      0x1502
+       do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1502
+       touch $DIR/$tdir/dummy
+
+       do_facet $SINGLEMDS $LCTL set_param fail_loc=0
+       umount_client $MOUNT
+       #define OBD_FAIL_FID_NOLMA      0x1506
+       do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1506
+       $START_NAMESPACE || error "(3) Fail to start LFSCK for namespace!"
+
+       sleep 3
+       local STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }')
+       [ "$STATUS" == "completed" ] ||
+               error "(4) Expect 'completed', but got '$STATUS'"
+
+       local repaired=$($SHOW_NAMESPACE |
+                        awk '/^updated_phase1/ { print $2 }')
+       [ $repaired -eq 1 ] ||
+               error "(5) Fail to repair missed FID-in-LMA: $repaired"
+
+       do_facet $SINGLEMDS $LCTL set_param fail_loc=0
+       mount_client $MOUNT || error "(6) Fail to start client!"
+
+       #define OBD_FAIL_FID_LOOKUP     0x1505
+       do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1505
+       stat $DIR/$tdir/dummy > /dev/null || error "(7) no FID-in-LMA."
+
+       do_facet $SINGLEMDS $LCTL set_param fail_loc=0
+}
+run_test 1b "LFSCK can find out and repair missed FID-in-LMA"
+
+test_4()
+{
+       lfsck_prep 3 3
+       mds_backup_restore || error "(1) Fail to backup/restore!"
+       echo "start $SINGLEMDS with disabling OI scrub"
+       start $SINGLEMDS $MDT_DEVNAME $MOUNT_OPTS_NOSCRUB > /dev/null ||
+               error "(2) Fail to start MDS!"
+
+       local STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }')
+       [ "$STATUS" == "init" ] ||
+               error "(3) Expect 'init', but got '$STATUS'"
+
+       #define OBD_FAIL_LFSCK_DELAY2           0x1601
+       do_facet $SINGLEMDS $LCTL set_param fail_val=1
+       do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1601
+       $START_NAMESPACE || error "(4) Fail to start LFSCK for namespace!"
+
+       sleep 5
+       STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }')
+       [ "$STATUS" == "scanning-phase1" ] ||
+               error "(5) Expect 'scanning-phase1', but got '$STATUS'"
+
+       local FLAGS=$($SHOW_NAMESPACE | awk '/^flags/ { print $2 }')
+       [ "$FLAGS" == "inconsistent" ] ||
+               error "(6) Expect 'inconsistent', but got '$FLAGS'"
+
+       do_facet $SINGLEMDS $LCTL set_param fail_loc=0
+       do_facet $SINGLEMDS $LCTL set_param fail_val=0
+       sleep 3
+       STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }')
+       [ "$STATUS" == "completed" ] ||
+               error "(7) Expect 'completed', but got '$STATUS'"
+
+       FLAGS=$($SHOW_NAMESPACE | awk '/^flags/ { print $2 }')
+       [ -z "$FLAGS" ] || error "(8) Expect empty flags, but got '$FLAGS'"
+
+       local repaired=$($SHOW_NAMESPACE |
+                        awk '/^updated_phase1/ { print $2 }')
+       [ $repaired -ge 9 ] ||
+               error "(9) Fail to repair crashed linkEA: $repaired"
+
+       mount_client $MOUNT || error "(10) Fail to start client!"
+
+       #define OBD_FAIL_FID_LOOKUP     0x1505
+       do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1505
+       ls $DIR/$tdir/ > /dev/null || error "(11) no FID-in-dirent."
+
+       do_facet $SINGLEMDS $LCTL set_param fail_loc=0
+}
+run_test 4 "FID-in-dirent can be rebuilt after MDT file-level backup/restore"
+
 test_6a() {
        lfsck_prep 10 10
        echo "start $SINGLEMDS"
 test_6a() {
        lfsck_prep 10 10
        echo "start $SINGLEMDS"