Whamcloud - gitweb
LU-3335 osd: self repair LMA by the RPC service thread 44/7144/10
authorFan Yong <fan.yong@intel.com>
Wed, 4 Sep 2013 14:32:51 +0000 (22:32 +0800)
committerOleg Drokin <oleg.drokin@intel.com>
Wed, 6 Nov 2013 06:59:43 +0000 (06:59 +0000)
Current OI scrub uses the FID in LMA to locate the OI mapping and
repair the inconsistent OI mapping. Such inode table based system
scanning does not guarantee all OI inconsistency can be repaired,
such as inode without LMA or with crashed LMA, or cannot find out
dummy OI mappings (the object for the OI mapping does not exist).

In fact, after the inode table based scanning, the OI scrub can
make another OI table based scanning to resolve above mentioned
issues. But such scanning will be nonlinear, so performance may
be bad.

Currently, we have some other relative simple solution although
they are not perfect. We can make the RPC service thread to fix
the found inconsistency by itself: when verify the LMA with the
given FID, and found no LMA, then generate LMA according to the
given FID and store the new LMA as the inode xattr. Usually, it
is rare case that the inode has no LMA. So such additional work
for the RPC service thread will not affect performance much.

Signed-off-by: Fan Yong <fan.yong@intel.com>
Change-Id: Ic6b8b533e56b6a0e388d5c38de7b113cb9bd6dad
Reviewed-on: http://review.whamcloud.com/7144
Tested-by: Jenkins
Tested-by: Maloo <hpdd-maloo@intel.com>
Reviewed-by: Alex Zhuravlev <alexey.zhuravlev@intel.com>
Reviewed-by: Andreas Dilger <andreas.dilger@intel.com>
Reviewed-by: Mike Pershin <mike.pershin@intel.com>
lustre/osd-ldiskfs/osd_handler.c
lustre/osd-ldiskfs/osd_internal.h
lustre/osd-ldiskfs/osd_lproc.c

index 241fc19..096dac0 100644 (file)
@@ -319,6 +319,7 @@ int osd_get_idif(struct osd_thread_info *info, struct inode *inode,
 static int osd_check_lma(const struct lu_env *env, struct osd_object *obj)
 {
        struct osd_thread_info  *info   = osd_oti_get(env);
 static int osd_check_lma(const struct lu_env *env, struct osd_object *obj)
 {
        struct osd_thread_info  *info   = osd_oti_get(env);
+       struct osd_device       *osd    = osd_obj2dev(obj);
        struct lustre_mdt_attrs *lma    = &info->oti_mdt_attrs;
        struct inode            *inode  = obj->oo_inode;
        struct dentry           *dentry = &info->oti_obj_dentry;
        struct lustre_mdt_attrs *lma    = &info->oti_mdt_attrs;
        struct inode            *inode  = obj->oo_inode;
        struct dentry           *dentry = &info->oti_obj_dentry;
@@ -333,11 +334,41 @@ static int osd_check_lma(const struct lu_env *env, struct osd_object *obj)
        rc = __osd_xattr_get(inode, dentry, XATTR_NAME_LMA,
                             info->oti_mdt_attrs_old, LMA_OLD_SIZE);
        if (rc == -ENODATA && !fid_is_igif(lu_object_fid(&obj->oo_dt.do_lu)) &&
        rc = __osd_xattr_get(inode, dentry, XATTR_NAME_LMA,
                             info->oti_mdt_attrs_old, LMA_OLD_SIZE);
        if (rc == -ENODATA && !fid_is_igif(lu_object_fid(&obj->oo_dt.do_lu)) &&
-           osd_obj2dev(obj)->od_check_ff) {
+           osd->od_check_ff) {
                fid = &lma->lma_self_fid;
                rc = osd_get_idif(info, inode, dentry, fid);
                fid = &lma->lma_self_fid;
                rc = osd_get_idif(info, inode, dentry, fid);
-               if (rc > 0)
+               if ((rc > 0) || (rc == -ENODATA && osd->od_lma_self_repair)) {
+                       handle_t *jh;
+
+                       /* For the given OST-object, if it has neither LMA nor
+                        * FID in XATTR_NAME_FID, then the given FID (which is
+                        * contained in the @obj, from client RPC for locating
+                        * the OST-object) is trusted. We use it to generate
+                        * the LMA. */
+
+                       LASSERT(current->journal_info == NULL);
+
+                       jh = ldiskfs_journal_start_sb(osd_sb(osd),
+                                       osd_dto_credits_noquota[DTO_XATTR_SET]);
+                       if (IS_ERR(jh)) {
+                               CWARN("%s: cannot start journal for "
+                                     "lma_self_repair: rc = %ld\n",
+                                     osd_name(osd), PTR_ERR(jh));
+                               RETURN(0);
+                       }
+
+                       rc = osd_ea_fid_set(info, inode,
+                               lu_object_fid(&obj->oo_dt.do_lu),
+                               fid_is_on_ost(info, osd,
+                                             lu_object_fid(&obj->oo_dt.do_lu),
+                                             OI_CHECK_FLD) ?
+                               LMAC_FID_ON_OST : 0, 0);
+                       if (rc != 0)
+                               CWARN("%s: cannot self repair the LMA: "
+                                     "rc = %d\n", osd_name(osd), rc);
+                       ldiskfs_journal_stop(jh);
                        RETURN(0);
                        RETURN(0);
+               }
        }
 
        if (unlikely(rc == -ENODATA))
        }
 
        if (unlikely(rc == -ENODATA))
@@ -352,8 +383,7 @@ static int osd_check_lma(const struct lu_env *env, struct osd_object *obj)
                if (unlikely((lma->lma_incompat & ~LMA_INCOMPAT_SUPP) ||
                             CFS_FAIL_CHECK(OBD_FAIL_OSD_LMA_INCOMPAT))) {
                        CWARN("%s: unsupported incompat LMA feature(s) %#x for "
                if (unlikely((lma->lma_incompat & ~LMA_INCOMPAT_SUPP) ||
                             CFS_FAIL_CHECK(OBD_FAIL_OSD_LMA_INCOMPAT))) {
                        CWARN("%s: unsupported incompat LMA feature(s) %#x for "
-                             "fid = "DFID", ino = %lu\n",
-                             osd_obj2dev(obj)->od_svname,
+                             "fid = "DFID", ino = %lu\n", osd_name(osd),
                              lma->lma_incompat & ~LMA_INCOMPAT_SUPP,
                              PFID(lu_object_fid(&obj->oo_dt.do_lu)),
                              inode->i_ino);
                              lma->lma_incompat & ~LMA_INCOMPAT_SUPP,
                              PFID(lu_object_fid(&obj->oo_dt.do_lu)),
                              inode->i_ino);
@@ -366,8 +396,7 @@ static int osd_check_lma(const struct lu_env *env, struct osd_object *obj)
        if (fid != NULL &&
            unlikely(!lu_fid_eq(lu_object_fid(&obj->oo_dt.do_lu), fid))) {
                CDEBUG(D_INODE, "%s: FID "DFID" != self_fid "DFID"\n",
        if (fid != NULL &&
            unlikely(!lu_fid_eq(lu_object_fid(&obj->oo_dt.do_lu), fid))) {
                CDEBUG(D_INODE, "%s: FID "DFID" != self_fid "DFID"\n",
-                      osd_obj2dev(obj)->od_svname,
-                      PFID(lu_object_fid(&obj->oo_dt.do_lu)),
+                      osd_name(osd), PFID(lu_object_fid(&obj->oo_dt.do_lu)),
                       PFID(&lma->lma_self_fid));
                rc = -EREMCHG;
        }
                       PFID(&lma->lma_self_fid));
                rc = -EREMCHG;
        }
@@ -5582,6 +5611,9 @@ static int osd_device_init0(const struct lu_env *env,
        if (rc != 0)
                GOTO(out_site, rc);
 
        if (rc != 0)
                GOTO(out_site, rc);
 
+       /* self-repair LMA by default */
+       o->od_lma_self_repair = 1;
+
        CFS_INIT_LIST_HEAD(&o->od_ios_list);
        /* setup scrub, including OI files initialization */
        rc = osd_scrub_setup(env, o);
        CFS_INIT_LIST_HEAD(&o->od_ios_list);
        /* setup scrub, including OI files initialization */
        rc = osd_scrub_setup(env, o);
index c5440ae..50539a6 100644 (file)
@@ -240,7 +240,8 @@ struct osd_device {
                                  od_dirent_journal:1,
                                  od_igif_inoi:1,
                                  od_check_ff:1,
                                  od_dirent_journal:1,
                                  od_igif_inoi:1,
                                  od_check_ff:1,
-                                 od_is_ost:1;
+                                 od_is_ost:1,
+                                 od_lma_self_repair:1;
 
         unsigned long             od_capa_timeout;
         __u32                     od_capa_alg;
 
         unsigned long             od_capa_timeout;
         __u32                     od_capa_alg;
index 5171863..83bb586 100644 (file)
@@ -514,6 +514,38 @@ int lprocfs_osd_wr_readcache(struct file *file, const char *buffer,
        return count;
 }
 
        return count;
 }
 
+static int lprocfs_osd_rd_lma_self_repair(char *page, char **start, off_t off,
+                                         int count, int *eof, void *data)
+{
+       struct osd_device *dev = osd_dt_dev(data);
+
+       LASSERT(dev != NULL);
+       if (unlikely(dev->od_mnt == NULL))
+               return -EINPROGRESS;
+
+       *eof = 1;
+       return snprintf(page, count, "%d\n", !!dev->od_lma_self_repair);
+}
+
+static int lprocfs_osd_wr_lma_self_repair(struct file *file, const char *buffer,
+                                         unsigned long count, void *data)
+{
+       struct osd_device *dev = osd_dt_dev(data);
+       int                val;
+       int                rc;
+
+       LASSERT(dev != NULL);
+       if (unlikely(dev->od_mnt == NULL))
+               return -EINPROGRESS;
+
+       rc = lprocfs_write_helper(buffer, count, &val);
+       if (rc)
+               return rc;
+
+       dev->od_lma_self_repair = !!val;
+       return count;
+}
+
 struct lprocfs_vars lprocfs_osd_obd_vars[] = {
        { "blocksize",          lprocfs_dt_rd_blksize,  0, 0 },
        { "kbytestotal",        lprocfs_dt_rd_kbytestotal,      0, 0 },
 struct lprocfs_vars lprocfs_osd_obd_vars[] = {
        { "blocksize",          lprocfs_dt_rd_blksize,  0, 0 },
        { "kbytestotal",        lprocfs_dt_rd_kbytestotal,      0, 0 },
@@ -534,6 +566,8 @@ struct lprocfs_vars lprocfs_osd_obd_vars[] = {
                                        lprocfs_osd_wr_wcache, 0 },
        { "readcache_max_filesize",     lprocfs_osd_rd_readcache,
                                        lprocfs_osd_wr_readcache, 0 },
                                        lprocfs_osd_wr_wcache, 0 },
        { "readcache_max_filesize",     lprocfs_osd_rd_readcache,
                                        lprocfs_osd_wr_readcache, 0 },
+       { "lma_self_repair",    lprocfs_osd_rd_lma_self_repair,
+                               lprocfs_osd_wr_lma_self_repair, 0, 0 },
        { 0 }
 };
 
        { 0 }
 };